From ce1e918d50b6b1cf867142aff8058728d5b3bc65 Mon Sep 17 00:00:00 2001 From: Cheng Date: Sat, 7 Dec 2024 11:01:04 +0900 Subject: [PATCH] Update to MLX 0.21.0 --- deps/mlx | 2 +- lib/nn/layers/pooling.ts | 66 +++++++++++----------------------------- src/fast.cc | 17 +---------- 3 files changed, 20 insertions(+), 65 deletions(-) diff --git a/deps/mlx b/deps/mlx index cb431dfc..bb303c45 160000 --- a/deps/mlx +++ b/deps/mlx @@ -1 +1 @@ -Subproject commit cb431dfc9fcd63f5fe0a7b33e8efd19ad7d44d07 +Subproject commit bb303c45a55d7147bc261e9aa8be218d49500d09 diff --git a/lib/nn/layers/pooling.ts b/lib/nn/layers/pooling.ts index 60f2785d..d37cb9c6 100644 --- a/lib/nn/layers/pooling.ts +++ b/lib/nn/layers/pooling.ts @@ -97,12 +97,8 @@ class Pool3d extends Pool { * * @remarks * - * Assuming an input of shape `(N, L, C)` and `kernelSize` is `k`, the output is - * a tensor of shape `(N, L_out, C)`, given by: - * - * `out(N_i, t, C_j) = max_{m=0,...,k-1} input(N_i, stride * t + m, C_j)` - * - * where `L_out = floor((L + 2 * padding - kernelSize) / stride) + 1`. + * Spatially downsamples the input by taking the maximum of a sliding window + * of size `kernel_size` and sliding stride `stride`. * * @param kernelSize - The size of the pooling window kernel. * @param stride - The stride of the pooling window. Default: `kernelSize`. @@ -122,12 +118,8 @@ export class MaxPool1d extends Pool1d { * * @remarks * - * Assuming an input of shape `(N, L, C)` and `kernelSize` is `k`, the output is - * a tensor of shape `(N, L_out, C)`, given by: - * - * `out(N_i, t, C_j) = 1/k * sum_{m=0,...,k-1} input(N_i, stride * t + m, C_j)` - * - * where `L_out = floor((L + 2 * padding - kernelSize) / stride) + 1`. + * Spatially downsamples the input by taking the average of a sliding window + * of size `kernel_size` and sliding stride `stride`. * * @param kernelSize - The size of the pooling window kernel. * @param stride - The stride of the pooling window. Default: `kernelSize`. @@ -147,15 +139,11 @@ export class AvgPool1d extends Pool1d { * * @remarks * - * Assuming an input of shape `(N, H, W, C)` and `kernelSize` is `(k_H, k_W)`, - * the output is a tensor of shape `(N, H_out, W_out, C)`, given by: + * Spatially downsamples the input by taking the maximum of a sliding window + * of size `kernel_size` and sliding stride `stride`. * - * `out(N_i, h, w, C_j) = max_{m=0,...,k_H-1} max_{n=0,...,k_W-1} input(N_i, stride[0] * h + m, stride[1] * w + n, C_j)` + * The parameters `kernelSize`, `stride` and `padding` can either be: * - * where `H_out = floor((H + 2 * padding[0] - kernelSize[0]) / stride[0]) + 1` - * `W_out = floor((W + 2 * padding[1] - kernelSize[1]) / stride[1]) + 1` - * - * The parameters `kernelSize`, `stride`, `padding`, can either be: * - a single `number` -- in which case the same value is used for both the * height and width axis; * - a `tuple` of two `numbers`s -- in which case, the first `number` is used @@ -179,16 +167,10 @@ export class MaxPool2d extends Pool2d { * * @remarks * - * Assuming an input of shape `(N, H, W, C)` and `kernelSize` is `(kH, kW)`, - * the output is a tensor of shape `(N, H_out, W_out, C)`, given by: - * - * `out(N_i, h, w, C_j) = 1/(kH*kW) * sum_{m=0,...,kH-1} sum_{n=0,...,kW-1} - * input(N_i, stride[0] * h + m, stride[1] * w + n, C_j)` + * Spatially downsamples the input by taking the average of a sliding window + * of size `kernel_size` and sliding stride `stride`. * - * where `H_out = floor((H + 2 * padding[0] - kernelSize[0]) / stride[0]) + 1`, - * `W_out = floor((W + 2 * padding[1] - kernelSize[1]) / stride[1]) + 1`. - * - * The parameters `kernelSize`, `stride`, `padding`, can either be: + * The parameters `kernelSize`, `stride` and `padding` can either be: * * - a single `number` -- in which case the same value is used for both the * height and width axis @@ -213,22 +195,16 @@ export class AvgPool2d extends Pool2d { * * @remarks * - * Assuming an input of shape `(N, D, H, W, C)` and `kernelSize` is `(k_D, k_H, k_W)`, - * the output is a tensor of shape `(N, D_out, H_out, W_out, C)`, given by: - * - * `out(N_i, d, h, w, C_j) = max_{l=0,...,k_D-1} max_{m=0,...,k_H-1} max_{n=0,...,k_W-1} - * input(N_i, stride[0] * d + l, stride[1] * h + m, stride[2] * w + n, C_j)` + * Spatially downsamples the input by taking the maximum of a sliding window + * of size `kernel_size` and sliding stride `stride`. * - * where `D_out = floor((D + 2 * padding[0] - kernelSize[0]) / stride[0]) + 1` - * `H_out = floor((H + 2 * padding[1] - kernelSize[1]) / stride[1]) + 1` - * `W_out = floor((W + 2 * padding[2] - kernelSize[2]) / stride[2]) + 1` + * The parameters `kernelSize`, `stride` and `padding` can either be: * - * The parameters `kernelSize`, `stride`, `padding`, can either be: * - a single `number` -- in which case the same value is used for the depth, * height and width axis; - * - a `tuple` of three `numbers`s -- in which case, the first `number` is used - * for the depth axis, the second `number` for the height axis, and the third - * `number` for the width axis. + * - a `tuple` of three `numbers`s -- in which case, the first `number` is + * used for the depth axis, the second `number` for the height axis, and the + * third `number` for the width axis. * * @param kernelSize - The size of the pooling window. * @param stride - The stride of the pooling window. Default: `kernelSize`. @@ -248,14 +224,8 @@ export class MaxPool3d extends Pool3d { * * @remarks * - * Assuming an input of shape `(N, D, H, W, C)` and `kernelSize` is `(k_D, k_H, k_W)`, - * the output is a tensor of shape `(N, D_out, H_out, W_out, C)`, given by: - * - * `out(N_i, d, h, w, C_j) = (1 / (k_D * k_H * k_W)) * sum_{l=0,...,k_D-1} sum_{m=0,...,k_H-1} sum_{n=0,...,k_W-1} input(N_i, stride[0] * d + l, stride[1] * h + m, stride[2] * w + n, C_j)` - * - * where `D_out = floor((D + 2 * padding[0] - kernelSize[0]) / stride[0]) + 1` - * `H_out = floor((H + 2 * padding[1] - kernelSize[1]) / stride[1]) + 1` - * `W_out = floor((W + 2 * padding[2] - kernelSize[2]) / stride[2]) + 1` + * Spatially downsamples the input by taking the average of a sliding window + * of size `kernel_size` and sliding stride `stride`. * * The parameters `kernelSize`, `stride`, `padding`, can either be: * diff --git a/src/fast.cc b/src/fast.cc index 4db0c8e7..943f8d44 100644 --- a/src/fast.cc +++ b/src/fast.cc @@ -1,20 +1,6 @@ #include "src/array.h" #include "src/stream.h" -namespace fast_ops { - -mx::array AffineQuantize(const mx::array& w, - const mx::array& scales, - const mx::array& biases, - std::optional group_size, - std::optional bits, - mx::StreamOrDevice s) { - return mx::fast::affine_quantize(w, scales, biases, group_size.value_or(64), - bits.value_or(4)); -} - -} // namespace fast_ops - void InitFast(napi_env env, napi_value exports) { napi_value fast = ki::CreateObject(env); ki::Set(env, exports, "fast", fast); @@ -23,6 +9,5 @@ void InitFast(napi_env env, napi_value exports) { "rmsNorm", &mx::fast::rms_norm, "layerNorm", &mx::fast::layer_norm, "rope", &mx::fast::rope, - "scaledDotProductAttention", &mx::fast::scaled_dot_product_attention, - "affineQuantize", &fast_ops::AffineQuantize); + "scaledDotProductAttention", &mx::fast::scaled_dot_product_attention); }