From 567086469e95df7cad18244a65d4e9ef62dccc77 Mon Sep 17 00:00:00 2001 From: Ben Schmidt Date: Wed, 18 Dec 2024 16:51:22 -0500 Subject: [PATCH] webGPU pipelines --- package-lock.json | 7 +- package.json | 2 +- src/webGPU/buffertools.ts | 220 +++++++++++------------ src/webGPU/forests.ts | 107 ++++++------ src/webGPU/lib.ts | 356 +++++++++++++++++++++++--------------- 5 files changed, 385 insertions(+), 307 deletions(-) diff --git a/package-lock.json b/package-lock.json index de69b85e..ef7cdbd0 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "deepscatter", - "version": "3.0.0-next.43", + "version": "3.0.0-next.45", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "deepscatter", - "version": "3.0.0-next.43", + "version": "3.0.0-next.45", "license": "CC BY-NC-SA 4.0", "dependencies": { "d3-array": "^3.2.4", @@ -59,7 +59,7 @@ "vitest": "^2.1.4" }, "peerDependencies": { - "apache-arrow": ">=11.0.0" + "apache-arrow": "^17.0.0" } }, "node_modules/@75lb/deep-merge": { @@ -1193,6 +1193,7 @@ "version": "17.0.0", "resolved": "https://registry.npmjs.org/apache-arrow/-/apache-arrow-17.0.0.tgz", "integrity": "sha512-X0p7auzdnGuhYMVKYINdQssS4EcKec9TCXyez/qtJt32DrIMGbzqiaMiQ0X6fQlQpw8Fl0Qygcv4dfRAr5Gu9Q==", + "license": "Apache-2.0", "peer": true, "dependencies": { "@swc/helpers": "^0.5.11", diff --git a/package.json b/package.json index 250ec588..e46584a0 100644 --- a/package.json +++ b/package.json @@ -45,7 +45,7 @@ }, "homepage": "https://github.com/nomic-ai/deepscatter#readme", "peerDependencies": { - "apache-arrow": ">=11.0.0" + "apache-arrow": "^17.0.0" }, "dependencies": { "d3-array": "^3.2.4", diff --git a/src/webGPU/buffertools.ts b/src/webGPU/buffertools.ts index b980ad08..06ecefd6 100644 --- a/src/webGPU/buffertools.ts +++ b/src/webGPU/buffertools.ts @@ -1,127 +1,133 @@ import { isTypedArray, type TypedArray } from 'webgpu-utils'; import { BufferSet } from '../regl_rendering'; import { WebGPUBufferLocation } from '../types'; -// I track locations on buffers like this. -// We keep track of both size -- the number of meaningful data bytes -// and paddedSize -- the number of bytes including 256-byte padding. - -export class WebGPUBufferSet extends BufferSet { - // Copied with alterations from deepscatter +import { Some, TupleMap } from '../utilityFunctions'; - // An abstraction creating an expandable set of buffers that can be subdivided - // to put more than one variable on the same - // block of memory. Reusing buffers this way can have performance benefits over allocating - // multiple different buffers for each small block used. +// Unlike in webgl, we keep track of both size -- the number of meaningful data bytes +// and paddedSize -- the number of bytes including 256-byte padding. - // The general purpose here is to call 'allocate_block' that releases a block of memory - // to use in creating a new array to be passed to regl. +export class WebGPUBufferSet extends BufferSet< + GPUBuffer, + WebGPUBufferLocation +> { + public device: GPUDevice; + private stagingBuffer: GPUBuffer; + public usage: number; - public device: GPUDevice; - private stagingBuffer: GPUBuffer; - public usage: number; + public store: TupleMap = new TupleMap(); - public store: Map = new Map(); + /** + * + * @param regl the Regl context we're using. + * @param buffer_size The number of bytes on each strip of memory that we'll ask for. + */ - /** - * - * @param regl the Regl context we're using. - * @param buffer_size The number of bytes on each strip of memory that we'll ask for. - */ + constructor( + device: GPUDevice, + buffer_size: number, + usage: number = GPUBufferUsage.STORAGE | + GPUBufferUsage.COPY_DST | + GPUBufferUsage.COPY_SRC, + ) { + super(buffer_size); + this.device = device; + // Track the ends in case we want to allocate smaller items. + this.usage = usage; + this.generate_new_buffer(); + this.stagingBuffer = device.createBuffer({ + size: buffer_size, + usage: GPUBufferUsage.COPY_SRC | GPUBufferUsage.MAP_WRITE, + mappedAtCreation: false, // saves a little trouble in the passThrough function + }); + } - constructor( - device: GPUDevice, - buffer_size: number, - usage: number = GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_DST | GPUBufferUsage.COPY_SRC - ) { - super(buffer_size) - this.device = device; - // Track the ends in case we want to allocate smaller items. - this.usage = usage; - this.generate_new_buffer(); - this.stagingBuffer = device.createBuffer({ - size: buffer_size, - usage: GPUBufferUsage.COPY_SRC | GPUBufferUsage.MAP_WRITE, - mappedAtCreation: false // saves a little trouble in the passThrough function - }); - } + private async passThroughStagingBuffer( + values: Uint32Array, + bufferLocation: WebGPUBufferLocation, + ) { + // WebGPU + const { buffer, offset, paddedSize } = bufferLocation; + while (this.stagingBuffer.mapState !== 'unmapped') { + // Wait in line for a millisecond. + // Would be better to hold a queue and apply more than one of these at once. + await new Promise((resolve) => setTimeout(resolve, 1)); + } + await this.stagingBuffer.mapAsync(GPUMapMode.WRITE, 0, paddedSize); + new Uint32Array( + this.stagingBuffer.getMappedRange(0, values.byteLength), + ).set(values); + this.stagingBuffer.unmap(); + const commandEncoder = this.device.createCommandEncoder(); + commandEncoder.copyBufferToBuffer( + this.stagingBuffer, + 0, + buffer, + offset, + paddedSize, + ); + this.device.queue.submit([commandEncoder.finish()]); + } - private async passThroughStagingBuffer(values: Uint32Array, bufferLocation: WebGPUBufferLocation) { - // WebGPU - const { buffer, offset, paddedSize } = bufferLocation; - while (this.stagingBuffer.mapState !== 'unmapped') { - // Wait in line for a millisecond. - // Would be better to hold a queue and apply more than one of these at once. - await new Promise((resolve) => setTimeout(resolve, 1)); - } - await this.stagingBuffer.mapAsync(GPUMapMode.WRITE, 0, paddedSize); - new Uint32Array(this.stagingBuffer.getMappedRange(0, values.byteLength)).set(values); - this.stagingBuffer.unmap(); - const commandEncoder = this.device.createCommandEncoder(); - commandEncoder.copyBufferToBuffer(this.stagingBuffer, 0, buffer, offset, paddedSize); - this.device.queue.submit([commandEncoder.finish()]); - } + register(k: Some, v: WebGPUBufferLocation) { + this.store.set(k, v); + } - register(k: string, v: WebGPUBufferLocation) { - this.store.set(k, v); - } + async set(key: Some, value: TypedArray) { + if (this.store.has(key)) { + throw new Error(`Key ${key.join(', ')} already exists in buffer set.`); + } + const size = value.byteLength; + const paddedSize = Math.ceil(size / 256) * 256; - async set(key: string, value: TypedArray) { - if (this.store.has(key)) { - throw new Error(`Key ${key} already exists in buffer set.`); - } - const size = value.byteLength; - const paddedSize = Math.ceil(size / 256) * 256; + const { buffer, offset } = this.allocate_block(paddedSize); - const { buffer, offset } = this.allocate_block(paddedSize); + // If it's a typed array, we can just copy it directly. + // cast it to uint32array + const v2 = value; + const data = new Uint32Array(v2.buffer, v2.byteOffset, v2.byteLength / 4); + const description = { buffer, offset, size, paddedSize }; + await this.passThroughStagingBuffer(data, description); + this.register(key, description); + } - // If it's a typed array, we can just copy it directly. - // cast it to uint32array - const v2 = value; - const data = new Uint32Array(v2.buffer, v2.byteOffset, v2.byteLength / 4); - const description = { buffer, offset, size, paddedSize }; - await this.passThroughStagingBuffer(data, description); - this.register(key, description); - } + _create_buffer(): GPUBuffer { + return this.device.createBuffer({ + size: this.buffer_size, + usage: this.usage, + mappedAtCreation: false, + }); + } - _create_buffer() : GPUBuffer { - return this.device.createBuffer({ - size: this.buffer_size, - usage: this.usage, - mappedAtCreation: false - }) - } - - _create_leftover_buffer() : WebGPUBufferLocation { - return { - buffer: this.buffers[0], - offset: this.pointer, - stride: 4, // meaningless here. - byte_size: this.buffer_size - this.pointer, - paddedSize: this.buffer_size - this.pointer - } - } + _create_leftover_buffer(): WebGPUBufferLocation { + return { + buffer: this.buffers[0], + offset: this.pointer, + stride: 4, // meaningless here. + byte_size: this.buffer_size - this.pointer, + paddedSize: this.buffer_size - this.pointer, + }; + } } - export function createSingletonBuffer( - device: GPUDevice, - data: Uint32Array | Int32Array | Float32Array | ArrayBuffer, - usage: number + device: GPUDevice, + data: Uint32Array | Int32Array | Float32Array | ArrayBuffer, + usage: number, ): GPUBuffer { - // Creates a disposable singleton buffer. - // ReadonlyBufferSet ought to provide better performance; but - // this allows more different buffer sizes and easier destruction. - const buffer = device.createBuffer({ - size: data.byteLength, - usage, - mappedAtCreation: true - }); - const mappedRange = buffer.getMappedRange(); - if (isTypedArray(data)) { - new Uint32Array(mappedRange).set(data as TypedArray); - } else { - new Uint32Array(mappedRange).set(new Uint32Array(data as ArrayBuffer)); - } - buffer.unmap(); - return buffer; + // Creates a disposable singleton buffer. + // ReadonlyBufferSet ought to provide better performance; but + // this allows more different buffer sizes and easier destruction. + const buffer = device.createBuffer({ + size: data.byteLength, + usage, + mappedAtCreation: true, + }); + const mappedRange = buffer.getMappedRange(); + if (isTypedArray(data)) { + new Uint32Array(mappedRange).set(data as TypedArray); + } else { + new Uint32Array(mappedRange).set(new Uint32Array(data as ArrayBuffer)); + } + buffer.unmap(); + return buffer; } diff --git a/src/webGPU/forests.ts b/src/webGPU/forests.ts index 2c24626f..cf26ed94 100644 --- a/src/webGPU/forests.ts +++ b/src/webGPU/forests.ts @@ -1,5 +1,6 @@ -import { createSingletonBuffer, WebGPUBufferSet } from "./buffertools"; -import { StatefulGPU } from "./lib"; +import { Deeptable } from '../deepscatter'; +import { createSingletonBuffer, WebGPUBufferSet } from './buffertools'; +import { DeepGPU } from './lib'; type TinyForestParams = { nTrees: number; @@ -7,28 +8,31 @@ type TinyForestParams = { // The number of features to consider at each split. maxFeatures: number; D: number; -} +}; -const defaultTinyForestParams : TinyForestParams = { +const defaultTinyForestParams: TinyForestParams = { nTrees: 128, depth: 8, maxFeatures: 32, D: 768, -} +}; -export class TinyForest extends StatefulGPU { +export class TinyForest extends DeepGPU { params: TinyForestParams; - + private _bootstrapSamples?: GPUBuffer; // On the order of 100 KB - protected _forests?: GPUBuffer // On the order of 10 MB. + protected _forests?: GPUBuffer; // On the order of 10 MB. // private trainedThrough: number = 0; constructor( - device: GPUDevice, - bufferSize = 1024 * 1024 * 256, - t: Partial = {}) { - super(device, bufferSize) - this.params = {...defaultTinyForestParams, ...t} - this.initializeForestsToZero() + device: GPUDevice, + bufferSize = 1024 * 1024 * 256, + t: Partial = {}, + deeptable: Deeptable + ) { + throw new Error("Not implemented") + super(device, deeptable); + this.params = { ...defaultTinyForestParams, ...t }; + this.initializeForestsToZero(); this.bufferSet = new WebGPUBufferSet(device, bufferSize); } @@ -48,48 +52,51 @@ export class TinyForest extends StatefulGPU { // features buffer; binding: 0, visibility: GPUShaderStage.COMPUTE, - buffer: { type: 'storage' } + buffer: { type: 'storage' }, }, { // dims to check array; binding: 1, visibility: GPUShaderStage.COMPUTE, - buffer: { type: 'storage' } + buffer: { type: 'storage' }, }, { // output count buffer. binding: 2, visibility: GPUShaderStage.COMPUTE, - buffer: { type: 'storage' } - } - ] - }) + buffer: { type: 'storage' }, + }, + ], + }); // const subsetsToCheck = this.chooseNextFeatures(); - const pipelineLayout = device.createPipelineLayout({ bindGroupLayouts: [layout] }); + const pipelineLayout = device.createPipelineLayout({ + bindGroupLayouts: [layout], + }); - const shaderModule = device.createShaderModule({ code: ` + const shaderModule = device.createShaderModule({ + code: ` @group(0) @binding(0) var features: array; @group(0) @binding(1) var dimsToCheck: array; @group(0) @binding(2) var counts: array; @compute @workgroup_size(64) //TODOD HERE - ` }); - + `, + }); return device.createComputePipeline({ layout: pipelineLayout, compute: { module: shaderModule, - entryPoint: 'main' - } + entryPoint: 'main', + }, }); } //@ts-expect-error foo private chooseNextFeatures(n = 32) { - console.log({n}) + console.log({ n }); const { maxFeatures, nTrees, D } = this.params; const features = new Uint16Array(maxFeatures * D); for (let i = 0; i < nTrees; i++) { @@ -100,71 +107,59 @@ export class TinyForest extends StatefulGPU { const arr = new Uint16Array([...set].sort()); features.set(arr, i * maxFeatures); } - return createSingletonBuffer( - this.device, - features, - GPUBufferUsage.STORAGE - ) + return createSingletonBuffer(this.device, features, GPUBufferUsage.STORAGE); } - - initializeForestsToZero() { - // Each tree is a set of bits; For every possible configuration - // the first D indicating + // Each tree is a set of bits; For every possible configuration + // the first D indicating // the desired outcome for the dimension, // the second D indicating whether the bits in those // positions are to be considered in checking if the tree // fits. There are 2**depth bitmasks for each dimension--each point // will match only one, and part of the inference task is determining which one. - const treeSizeInBytes = - 2 * this.params.D * (2 ** this.params.depth) / 8; + const treeSizeInBytes = (2 * this.params.D * 2 ** this.params.depth) / 8; - const data = new Uint8Array(treeSizeInBytes * this.params.nTrees) + const data = new Uint8Array(treeSizeInBytes * this.params.nTrees); this._forests = createSingletonBuffer( this.device, data, - GPUBufferUsage.STORAGE - ) + GPUBufferUsage.STORAGE, + ); } - // Rather than actually bootstrap, we generate a single // list of 100,000 numbers drawn from a poisson distribution. - // These serve as weights for draws with replacement; to + // These serve as weights for draws with replacement; to // bootstrap any given record batch, we take a sequence of - // numbers from the buffer with offset i. + // numbers from the buffer with offset i. get bootstrapSamples() { if (this._bootstrapSamples) { - return this._bootstrapSamples + return this._bootstrapSamples; } else { - const arr = new Uint8Array(100000) + const arr = new Uint8Array(100000); for (let i = 0; i < arr.length; i++) { - arr[i] = poissonRandomNumber() + arr[i] = poissonRandomNumber(); } this._bootstrapSamples = createSingletonBuffer( this.device, arr, - GPUBufferUsage.STORAGE - ) - return this._bootstrapSamples + GPUBufferUsage.STORAGE, + ); + return this._bootstrapSamples; } } - - } - -function poissonRandomNumber() : number { +function poissonRandomNumber(): number { let p = 1.0; let k = 0; do { k++; p *= Math.random(); - } while (p > 1/Math.E); + } while (p > 1 / Math.E); return k - 1; } - diff --git a/src/webGPU/lib.ts b/src/webGPU/lib.ts index 4ad88694..e1cf03b7 100644 --- a/src/webGPU/lib.ts +++ b/src/webGPU/lib.ts @@ -1,135 +1,204 @@ import { makeShaderDataDefinitions, makeStructuredView } from 'webgpu-utils'; import { WebGPUBufferSet, createSingletonBuffer } from './buffertools'; -import { Scatterplot, Tile } from '../deepscatter'; - -export class StatefulGPU { - device: GPUDevice; - bufferSet: WebGPUBufferSet; - constructor(device: GPUDevice, bufferSize = 1024 * 1024 * 256) { - this.device = device; - this.bufferSet = new WebGPUBufferSet(device, bufferSize); +import { Deeptable, Scatterplot, Tile } from '../deepscatter'; +import { Bool, Vector, vectorFromArray } from 'apache-arrow'; + +export class DeepGPU { + // This is a stateful class for bundling together GPU buffers and resources. + // It's sort of replacing regl? I don't know yet, just feeling this out. + device: GPUDevice; + bufferSet: WebGPUBufferSet; + deeptable: Deeptable; + + /** + * Create a DeepGPU synchronously. Usually call DeepGPU.create() + * + * @param device The initialized + * @param bufferSize + */ + constructor( + device: GPUDevice, + deeptable: Deeptable, + bufferSize = 1024 * 1024 * 256, + ) { + this.device = device; + this.deeptable = deeptable; + this.bufferSet = new WebGPUBufferSet(device, bufferSize); + } + + static async create(deeptable: Deeptable): Promise { + // Create a DeepGPU object. + if (!navigator.gpu) { + throw new Error('WebGPU is not supported in this browser.'); + } + + const adapter = await navigator.gpu.requestAdapter(); + if (!adapter) { + throw new Error('Failed to get GPU adapter.'); + } + + const device = await adapter.requestDevice(); + return new DeepGPU(device, deeptable); + } + + async get(field: string, tile: Tile) { + if (this.bufferSet.store.has([field, tile.key])) { + return this.bufferSet.store.get([field, tile.key]) + } else { + const values = (await tile.get_column(field)).data[0].children[0] + .values as Uint8Array; + await this.bufferSet.set([field, tile.key], values); + return this.bufferSet.store.get([field, tile.key]) + } + } +} + + + + +export abstract class ReusableWebGPUPipeline { + public gpuState: DeepGPU + constructor( + gpuState: DeepGPU, + ) { + this.gpuState = gpuState } - static async initializeWebGPU(): Promise { - if (!navigator.gpu) { - throw new Error('WebGPU is not supported in this browser.'); + abstract shaderCode() : string; + // eslint-disable-next-line @typescript-eslint/no-explicit-any + abstract uniforms(): Record; + protected uniformBuffer?: GPUBuffer; + protected pipeline?: GPUComputePipeline; +} + +export class HammingPipeline extends ReusableWebGPUPipeline { + public gpuState: DeepGPU; + public dimensionality? : number; + public comparisonBuffer: GPUBuffer; + private fieldName = '_hamming_embeddings'; + constructor( + gpuState: DeepGPU, + ) { + super(gpuState) + } + + bindGroupLayout(device: GPUDevice) { + return device.createBindGroupLayout({ + entries: [ + { + binding: 0, + visibility: GPUShaderStage.COMPUTE, + buffer: { type: 'read-only-storage' }, + }, + { + binding: 1, + visibility: GPUShaderStage.COMPUTE, + buffer: { type: 'read-only-storage' }, + }, + { + binding: 2, + visibility: GPUShaderStage.COMPUTE, + buffer: { type: 'storage' }, + }, + { + binding: 3, + visibility: GPUShaderStage.COMPUTE, + buffer: { type: 'uniform' }, + }, + ], + }); + } + + shaderCode() { + return ` + struct SizeEtc { + objectSize: u32, + }; + + @group(0) @binding(0) var comparisonArray : array; + @group(0) @binding(1) var matrixArray : array; + @group(0) @binding(2) var outputArray : array; + @group(0) @binding(3) var myUniforms: SizeEtc; + + @compute @workgroup_size(64) + fn main(@builtin(global_invocation_id) global_id : vec3) { + let idx = global_id.x; + let o = myUniforms.objectSize; + if (idx < arrayLength(&matrixArray)) { + var totalDistance: u32 = 0; + for (var i: u32 = 0; i < o; i = i + 1) { + for (var j: u32 = 0; j < arrayLength(&comparisonArray) / o; j = j + 1) { + totalDistance = totalDistance + countOneBits(comparisonArray[j * o + i] ^ matrixArray[idx * o + i]); + } + } + outputArray[global_id.x] = totalDistance; + } } - - const adapter = await navigator.gpu.requestAdapter(); - if (!adapter) { - throw new Error('Failed to get GPU adapter.'); + `} + + setComparisonArray( + arr: Vector + ) { + const underlying = arr.data[0].values; + this.comparisonBuffer = createSingletonBuffer( + this.gpuState.device, + underlying, + GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC, + ); + this.dimensionality = underlying.length; + } + + uniforms() { + return { + objectSize: this.dimensionality / 32, } + } + + prepUniforms() { + const defs = makeShaderDataDefinitions(this.shaderCode()); + + const myUniformValues = makeStructuredView(defs.uniforms.myUniforms); - const device = await adapter.requestDevice(); - return new StatefulGPU(device); + myUniformValues.set(this.uniforms()); + return myUniformValues; } -} -const bindGroupLayout = (device: GPUDevice) => - device.createBindGroupLayout({ - entries: [ - { - binding: 0, - visibility: GPUShaderStage.COMPUTE, - buffer: { type: 'read-only-storage' } - }, - { - binding: 1, - visibility: GPUShaderStage.COMPUTE, - buffer: { type: 'read-only-storage' } - }, - { - binding: 2, - visibility: GPUShaderStage.COMPUTE, - buffer: { type: 'storage' } + prep() { + const { device } = this.gpuState; + const layout = device.createPipelineLayout({ + bindGroupLayouts: [this.bindGroupLayout(device)], + }); + // Create shader module and pipeline + const shaderModule = device.createShaderModule({ code: this.shaderCode() }); + this.pipeline = device.createComputePipeline({ + layout, + compute: { + module: shaderModule, + entryPoint: 'main', }, - { - binding: 3, - visibility: GPUShaderStage.COMPUTE, - buffer: { type: 'uniform' } - } - ] - }); - - export function prepareComputeShader( - state: StatefulGPU, - comparisonArray: Uint32Array, - embeddingSize: number = 128 -): (tile, key) => Promise { - // Create buffers - const { device, bufferSet } = state; - const comparisonBuffer = createSingletonBuffer( - device, - comparisonArray, - GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC - ); - - // Shader code - const shaderCode = ` - - struct SizeEtc { - objectSize: u32, - }; - - @group(0) @binding(0) var comparisonArray : array; - @group(0) @binding(1) var matrixArray : array; - @group(0) @binding(2) var outputArray : array; - @group(0) @binding(3) var myUniforms: SizeEtc; - - @compute @workgroup_size(64) - fn main(@builtin(global_invocation_id) global_id : vec3) { - let idx = global_id.x; - let o = myUniforms.objectSize; - if (idx < arrayLength(&matrixArray)) { - var totalDistance: u32 = 0; - for (var i: u32 = 0; i < o; i = i + 1) { - for (var j: u32 = 0; j < arrayLength(&comparisonArray) / o; j = j + 1) { - totalDistance = totalDistance + countOneBits(comparisonArray[j * o + i] ^ matrixArray[idx * o + i]); - } - } - outputArray[global_id.x] = totalDistance; - } - } -`; - - const defs = makeShaderDataDefinitions(shaderCode); - const myUniformValues = makeStructuredView(defs.uniforms.myUniforms); - myUniformValues.set({ - objectSize: embeddingSize / 32 - }); - const layout = device.createPipelineLayout({ bindGroupLayouts: [bindGroupLayout(device)] }); - // Create shader module and pipeline - const shaderModule = device.createShaderModule({ code: shaderCode }); - const pipeline = device.createComputePipeline({ - layout, - compute: { - module: shaderModule, - entryPoint: 'main' - } - }); - const uniformBuffer = createSingletonBuffer( - device, - myUniformValues.arrayBuffer, - GPUBufferUsage.UNIFORM - ); - - const run = async function (tile: Tile, fieldName: string) { + }); + this.uniformBuffer = createSingletonBuffer( + device, + this.prepUniforms().arrayBuffer, + GPUBufferUsage.UNIFORM, + ); + } + + async runOnTile(tile: Tile) { + const { comparisonBuffer, fieldName, pipeline, uniformBuffer, dimensionality: embeddingSize } = this; + const { device } = this.gpuState; const commandEncoder = device.createCommandEncoder(); - const key = `${tile.key}_${fieldName}`; - if (!bufferSet.store.has(key)) { - const values = (await tile.get_column(fieldName)).data[0].children[0].values as Uint8Array; - await bufferSet.set(key, values); - } - const { buffer, offset, byte_size: size } = bufferSet.store.get(key); + + const { buffer, offset, byte_size: size } = await this.gpuState.get(fieldName, tile) const outputSize = (size / embeddingSize) * 8; const paddedSize = Math.ceil(outputSize / 4) * 4; - + // TODO this should be a permanent buffer. const outputBuffer = device.createBuffer({ // Put a ceiling on it. size: paddedSize * 4, - usage: GPUBufferUsage.COPY_SRC | GPUBufferUsage.STORAGE + usage: GPUBufferUsage.COPY_SRC | GPUBufferUsage.STORAGE, }); - + const passEncoder = commandEncoder.beginComputePass(); passEncoder.setPipeline(pipeline); passEncoder.setBindGroup( @@ -140,55 +209,62 @@ const bindGroupLayout = (device: GPUDevice) => { binding: 0, resource: { buffer: comparisonBuffer } }, { binding: 1, resource: { buffer, offset, size } }, { binding: 2, resource: { buffer: outputBuffer } }, - { binding: 3, resource: { buffer: uniformBuffer } } - ] - }) + { binding: 3, resource: { buffer: uniformBuffer } }, + ], + }), ); - + passEncoder.dispatchWorkgroups(size / 4 / 64); passEncoder.end(); // Submit the commands const gpuReadBuffer = device.createBuffer({ size: paddedSize * 4, - usage: GPUBufferUsage.COPY_DST | GPUBufferUsage.MAP_READ + usage: GPUBufferUsage.COPY_DST | GPUBufferUsage.MAP_READ, }); - commandEncoder.copyBufferToBuffer(outputBuffer, 0, gpuReadBuffer, 0, paddedSize * 4); + commandEncoder.copyBufferToBuffer( + outputBuffer, + 0, + gpuReadBuffer, + 0, + paddedSize * 4, + ); device.queue.submit([commandEncoder.finish()]); // Read back the results await gpuReadBuffer.mapAsync(GPUMapMode.READ); const outputArray = new Uint32Array(gpuReadBuffer.getMappedRange()); - return outputArray.slice(0, outputSize); - }; - return run; + const usable = outputArray.slice(0, outputSize); + const returnVal = new Float32Array(usable.length) + for (let i = 0; i < returnVal.length; i++) { + returnVal[i] = usable[i] / embeddingSize // (originally this was squared??) + } + return vectorFromArray(returnVal) + } } - + // hide the state in a global variable. -const dumb: StatefulGPU[] = []; +const dumb: DeepGPU[] = []; export async function create_hamming_transform( scatterplot: Scatterplot, id: string, - view: Uint8Array, - dims: number, - column: string + view: Vector, ) { if (dumb.length === 0) { - dumb.push(await StatefulGPU.initializeWebGPU()); + dumb.push(await DeepGPU.create(scatterplot.deeptable)); } if (scatterplot.dataset.transformations[id] !== undefined) { return; } - // Cast from int8 to int32 - const comparisonArray = new Uint32Array(view.buffer); - const run = prepareComputeShader(dumb[0], comparisonArray, dims); - - scatterplot.dataset.transformations[id] = async function (tile) { - const value = await run(tile, column); - const scaled = [...value].map((d) => d / ( comparisonArray.length * 32 / dims)); - return new Float32Array(scaled) - }; - await scatterplot.dataset.root_tile.get_column(id); + + const [gpuState] = dumb; + const pipeline = new HammingPipeline(gpuState); + pipeline.setComparisonArray(view) + pipeline.prep(); + + scatterplot.dataset.transformations[id] = (tile) => pipeline.runOnTile(tile) } + +