Add a CPU variant of autoencoder

BrainJS · Jun 18, 2024 · 00b8515 · 00b8515
1 parent 8f8f455
commit 00b8515
Show file tree

Hide file tree

Showing 5 changed files with 325 additions and 15 deletions.
diff --git a/src/autoencoder-gpu.test.ts b/src/autoencoder-gpu.test.ts
@@ -0,0 +1,76 @@
+import AutoencoderGPU from './autoencoder-gpu';
+import { INeuralNetworkTrainOptions } from './neural-network';
+
+const trainingData = [
+  [0, 0, 0],
+  [0, 1, 1],
+  [1, 0, 1],
+  [1, 1, 0],
+];
+
+const xornet = new AutoencoderGPU<number[], number[]>({
+  inputSize: 3,
+  hiddenLayers: [4, 2, 4],
+  outputSize: 3,
+});
+
+const errorThresh = 0.0011;
+
+const trainOptions: Partial<INeuralNetworkTrainOptions> = {
+  errorThresh,
+  iterations: 250000,
+  learningRate: 0.1,
+  log: (details) => console.log(details),
+  // logPeriod: 500,
+  logPeriod: 500,
+};
+
+const result = xornet.train(trainingData, trainOptions);
+
+test('denoise a data sample', async () => {
+  expect(result.error).toBeLessThanOrEqual(errorThresh);
+
+  function xor(...args: number[]) {
+    return Math.round(xornet.denoise(args)[2]);
+  }
+
+  const run1 = xor(0, 0, 0);
+  const run2 = xor(0, 1, 1);
+  const run3 = xor(1, 0, 1);
+  const run4 = xor(1, 1, 0);
+
+  expect(run1).toBe(0);
+  expect(run2).toBe(1);
+  expect(run3).toBe(1);
+  expect(run4).toBe(0);
+});
+
+test('encode and decode a data sample', async () => {
+  expect(result.error).toBeLessThanOrEqual(errorThresh);
+
+  const run1$input = [0, 0, 0];
+  const run1$encoded = xornet.encode(run1$input);
+  const run1$decoded = xornet.decode(run1$encoded);
+
+  const run2$input = [0, 1, 1];
+  const run2$encoded = xornet.encode(run2$input);
+  const run2$decoded = xornet.decode(run2$encoded);
+
+  for (let i = 0; i < 3; i++)
+    expect(Math.round(run1$decoded[i])).toBe(run1$input[i]);
+  for (let i = 0; i < 3; i++)
+    expect(Math.round(run2$decoded[i])).toBe(run2$input[i]);
+});
+
+test('test a data sample for anomalies', async () => {
+  expect(result.error).toBeLessThanOrEqual(errorThresh);
+
+  function includesAnomalies(...args: number[]) {
+    expect(xornet.likelyIncludesAnomalies(args, 0.5)).toBe(false);
+  }
+
+  includesAnomalies(0, 0, 0);
+  includesAnomalies(0, 1, 1);
+  includesAnomalies(1, 0, 1);
+  includesAnomalies(1, 1, 0);
+});
diff --git a/src/autoencoder-gpu.ts b/src/autoencoder-gpu.ts
@@ -0,0 +1,235 @@
+import {
+  IKernelFunctionThis,
+  KernelOutput,
+  Texture,
+  TextureArrayOutput,
+} from 'gpu.js';
+import {
+  IJSONLayer,
+  INeuralNetworkData,
+  INeuralNetworkDatum,
+  INeuralNetworkTrainOptions,
+  NeuralNetworkIO,
+  NeuralNetworkRAM,
+} from './neural-network';
+import {
+  INeuralNetworkGPUOptions,
+  NeuralNetworkGPU,
+} from './neural-network-gpu';
+import { INeuralNetworkState } from './neural-network-types';
+import { UntrainedNeuralNetworkError } from './errors/untrained-neural-network-error';
+
+function loss(
+  this: IKernelFunctionThis,
+  actual: number,
+  expected: number,
+  inputs: NeuralNetworkIO,
+  ram: NeuralNetworkRAM
+) {
+  let error = expected - actual;
+
+  // if ( o ≈ i0 ) then return 3.125% of the loss value.
+  // Otherwise, return 3200% of the full loss value.
+  // eslint-disable-next-line @typescript-eslint/ban-ts-comment
+  // @ts-expect-error
+  if (Math.round(actual) !== Math.round(inputs[this.thread.x])) error *= 32;
+  else error *= 0.03125;
+
+  return error;
+}
+
+/**
+ * An autoencoder learns to compress input data down to relevant features and reconstruct input data from its compressed representation.
+ */
+export class AutoencoderGPU<
+  DecodedData extends INeuralNetworkData,
+  EncodedData extends INeuralNetworkData
+> extends NeuralNetworkGPU<DecodedData, DecodedData> {
+  private decoder?: NeuralNetworkGPU<EncodedData, DecodedData>;
+
+  constructor(options?: Partial<INeuralNetworkGPUOptions>) {
+    // Create default options for the autoencoder.
+    options ??= {};
+
+    const decodedSize = options.inputSize ?? options.outputSize ?? 1;
+
+    // Define the denoiser subnet's input and output sizes.
+    options.inputSize = options.outputSize = decodedSize;
+
+    options.hiddenLayers ??= [Math.round(decodedSize * 0.66)];
+
+    options.loss ??= loss;
+
+    // Create the autoencoder.
+    super(options);
+  }
+
+  /**
+   * Denoise input data, removing any anomalies from the data.
+   * @param {DecodedData} input
+   * @returns {DecodedData}
+   */
+  denoise(input: DecodedData): DecodedData {
+    // Run the input through the generic denoiser.
+    // This isn't the best denoiser implementation, but it's efficient.
+    // Efficiency is important here because training should focus on
+    // optimizing for feature extraction as quickly as possible rather than
+    // denoising and anomaly detection; there are other specialized topologies
+    // better suited for these tasks anyways, many of which can be implemented
+    // by using an autoencoder.
+    return this.run(input);
+  }
+
+  /**
+   * Decode `EncodedData` into an approximation of its original form.
+   *
+   * @param {EncodedData} input
+   * @returns {DecodedData}
+   */
+  decode(input: EncodedData): DecodedData {
+    // If the decoder has not been trained yet, throw an error.
+    if (!this.decoder) throw new UntrainedNeuralNetworkError(this);
+
+    // Decode the encoded input.
+    return this.decoder.run(input);
+  }
+
+  /**
+   * Encode data to extract features, reduce dimensionality, etc.
+   *
+   * @param {DecodedData} input
+   * @returns {EncodedData}
+   */
+  encode(input: DecodedData): EncodedData {
+    // If the decoder has not been trained yet, throw an error.
+    if (!this) throw new UntrainedNeuralNetworkError(this);
+
+    // Process the input.
+    this.run(input);
+
+    // Get the auto-encoded input.
+    let encodedInput: TextureArrayOutput = this
+      .encodedLayer as TextureArrayOutput;
+
+    // If the encoded input is a `Texture`, convert it into an `Array`.
+    if (encodedInput instanceof Texture) encodedInput = encodedInput.toArray();
+    else encodedInput = encodedInput.slice(0);
+
+    // Return the encoded input.
+    return encodedInput as EncodedData;
+  }
+
+  /**
+   * Test whether or not a data sample likely contains anomalies.
+   * If anomalies are likely present in the sample, returns `true`.
+   * Otherwise, returns `false`.
+   *
+   * @param {DecodedData} input
+   * @returns {boolean}
+   */
+  likelyIncludesAnomalies(
+    input: DecodedData,
+    anomalyThreshold: number
+  ): boolean {
+    // Create the anomaly vector.
+    const anomalies: number[] = [];
+
+    // Attempt to denoise the input.
+    const denoised = this.denoise(input);
+
+    // Calculate the anomaly vector.
+    for (let i = 0; i < (input.length ?? 0); i++) {
+      anomalies[i] = Math.abs(
+        (input as number[])[i] - (denoised as number[])[i]
+      );
+    }
+
+    // Calculate the sum of all anomalies within the vector.
+    const sum = anomalies.reduce(
+      (previousValue, value) => previousValue + value
+    );
+
+    // Calculate the mean anomaly.
+    const mean = sum / (input as number[]).length;
+
+    console.log(sum, mean, anomalyThreshold);
+
+    // Return whether or not the mean anomaly rate is greater than the anomaly threshold.
+    return mean > anomalyThreshold;
+  }
+
+  /**
+   * Train the auto encoder.
+   *
+   * @param {DecodedData[]} data
+   * @param {Partial<INeuralNetworkTrainOptions>} options
+   * @returns {INeuralNetworkState}
+   */
+  train(
+    data:
+      | Array<Partial<DecodedData>>
+      | Array<INeuralNetworkDatum<Partial<DecodedData>, Partial<DecodedData>>>,
+    options?: Partial<INeuralNetworkTrainOptions>
+  ): INeuralNetworkState {
+    const preprocessedData: Array<INeuralNetworkDatum<
+      Partial<DecodedData>,
+      Partial<DecodedData>
+    >> = [];
+
+    if (data.length && data.length > 0)
+      for (const datum of data) {
+        preprocessedData.push({
+          input: datum as Partial<DecodedData>,
+          output: datum as Partial<DecodedData>,
+        });
+      }
+
+    const results = super.train(preprocessedData, options);
+
+    this.decoder = this.createDecoder();
+
+    return results;
+  }
+
+  /**
+   * Create a new decoder from the trained denoiser.
+   *
+   * @returns {NeuralNetworkGPU<EncodedData, DecodedData>}
+   */
+  private createDecoder() {
+    const json = this.toJSON();
+
+    const layers: IJSONLayer[] = [];
+    const sizes: number[] = [];
+
+    for (let i = this.encodedLayerIndex; i < this.sizes.length; i++) {
+      layers.push(json.layers[i]);
+      sizes.push(json.sizes[i]);
+    }
+
+    json.layers = layers;
+    json.sizes = sizes;
+
+    json.options.inputSize = json.sizes[0];
+
+    const decoder = new NeuralNetworkGPU().fromJSON(json);
+
+    return (decoder as unknown) as NeuralNetworkGPU<EncodedData, DecodedData>;
+  }
+
+  /**
+   * Get the layer containing the encoded representation.
+   */
+  private get encodedLayer(): KernelOutput {
+    return this.outputs[this.encodedLayerIndex];
+  }
+
+  /**
+   * Get the offset of the encoded layer.
+   */
+  private get encodedLayerIndex(): number {
+    return Math.round(this.outputs.length * 0.5) - 1;
+  }
+}
+
+export default AutoencoderGPU;
diff --git a/src/autoencoder.test.ts b/src/autoencoder.test.ts
@@ -1,4 +1,4 @@
-import AutoencoderGPU from './autoencoder';
+import Autoencoder from './autoencoder';
 import { INeuralNetworkTrainOptions } from './neural-network';
 
 const trainingData = [
@@ -8,7 +8,7 @@ const trainingData = [
   [1, 1, 0],
 ];
 
-const xornet = new AutoencoderGPU<number[], number[]>({
+const xornet = new Autoencoder<number[], number[]>({
   inputSize: 3,
   hiddenLayers: [4, 2, 4],
   outputSize: 3,

diff --git a/src/autoencoder.ts b/src/autoencoder.ts
@@ -11,11 +11,10 @@ import {
   INeuralNetworkTrainOptions,
   NeuralNetworkIO,
   NeuralNetworkRAM,
+  INeuralNetworkOptions,
+  NeuralNetwork,
 } from './neural-network';
-import {
-  INeuralNetworkGPUOptions,
-  NeuralNetworkGPU,
-} from './neural-network-gpu';
+
 import { INeuralNetworkState } from './neural-network-types';
 import { UntrainedNeuralNetworkError } from './errors/untrained-neural-network-error';
 
@@ -41,13 +40,13 @@ function loss(
 /**
  * An autoencoder learns to compress input data down to relevant features and reconstruct input data from its compressed representation.
  */
-export class AutoencoderGPU<
+export class Autoencoder<
   DecodedData extends INeuralNetworkData,
   EncodedData extends INeuralNetworkData
-> extends NeuralNetworkGPU<DecodedData, DecodedData> {
-  private decoder?: NeuralNetworkGPU<EncodedData, DecodedData>;
+> extends NeuralNetwork<DecodedData, DecodedData> {
+  private decoder?: NeuralNetwork<EncodedData, DecodedData>;
 
-  constructor(options?: Partial<INeuralNetworkGPUOptions>) {
+  constructor(options?: Partial<INeuralNetworkOptions>) {
     // Create default options for the autoencoder.
     options ??= {};
 
@@ -194,7 +193,7 @@ export class AutoencoderGPU<
   /**
    * Create a new decoder from the trained denoiser.
    *
-   * @returns {NeuralNetworkGPU<EncodedData, DecodedData>}
+   * @returns {NeuralNetwork<EncodedData, DecodedData>}
    */
   private createDecoder() {
     const json = this.toJSON();
@@ -212,9 +211,9 @@ export class AutoencoderGPU<
 
     json.options.inputSize = json.sizes[0];
 
-    const decoder = new NeuralNetworkGPU().fromJSON(json);
+    const decoder = new NeuralNetwork().fromJSON(json);
 
-    return (decoder as unknown) as NeuralNetworkGPU<EncodedData, DecodedData>;
+    return (decoder as unknown) as NeuralNetwork<EncodedData, DecodedData>;
   }
 
   /**
@@ -232,4 +231,4 @@ export class AutoencoderGPU<
   }
 }
 
-export default AutoencoderGPU;
+export default Autoencoder;
diff --git a/src/index.ts b/src/index.ts
@@ -1,5 +1,5 @@
 import * as activation from './activation';
-import { AutoencoderGPU } from './autoencoder';
+import { AutoencoderGPU } from './autoencoder-gpu';
 import CrossValidate from './cross-validate';
 import { FeedForward } from './feed-forward';
 import * as layer from './layer';