PABannier · PABannier · Oct 14, 2024 · Oct 19, 2024
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -8,6 +8,7 @@ endif()
 
 set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
 set(CMAKE_CXX_FLAGS_RELEASE "-O3")
+set(CMAKE_CXX_FLAGS_DEBUG "-g -O0")
 
 if(CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR)
     set(ENCODEC_STANDALONE ON)
@@ -27,12 +28,16 @@ add_library(
     ${ENCODEC_LIB} STATIC
     encodec.cpp
     encodec.h
+    encoder.cpp
     encoder.h
+    decoder.cpp
     decoder.h
     quantizer.h
     ops.cpp
     ops.h
+    utils.cpp
     utils.h
+    lstm.cpp
     lstm.h
 )
 
@@ -44,6 +49,10 @@ target_link_libraries(${ENCODEC_LIB} PUBLIC ggml)
 target_include_directories(${ENCODEC_LIB} PUBLIC .)
 target_compile_features(${ENCODEC_LIB} PUBLIC cxx_std_11)
 
+if (CMAKE_BUILD_TYPE STREQUAL "Debug")
+    target_compile_options(${ENCODEC_LIB} PRIVATE -g -O0)
+endif()
+
 if (GGML_CUBLAS)
     add_compile_definitions(GGML_USE_CUBLAS)
 endif()

diff --git a/decoder.cpp b/decoder.cpp
@@ -0,0 +1,134 @@
+/* Implementation of the decoder part of the Encodec model.
+
+For more details, see the explanation in the encoder.cpp file.
+*/
+
+#include "ggml.h"
+
+#include "decoder.h"
+#include "lstm.h"
+#include "ops.h"
+#include "utils.h"
+
+const static int DECODER_TOTAL_NUM_NODES = 220;
+
+static struct ggml_tensor *encodec_forward_decoder_step_0(
+    const struct encodec_decoder * decoder,
+             struct ggml_context * ctx0,
+              struct ggml_tensor * quantized_out,
+                       const int   stride) {
+
+    struct ggml_tensor *inpL = strided_conv_1d(
+        ctx0, quantized_out, decoder->init_conv_w, decoder->init_conv_b, stride);
+
+    return inpL;
+}
+
+static struct ggml_tensor *encodec_forward_decoder_step_1(
+    const struct encodec_decoder * decoder,
+             struct ggml_context * ctx0,
+              struct ggml_tensor * inpL,
+                       const int * ratios,
+                       const int   stride,
+                       const int   kernel_size,
+                       const int   res_kernel_size) {
+
+    struct ggml_tensor *cur = inpL;
+
+    // multi-layer lstm
+    struct ggml_tensor *out = encodec_lstm(ctx0, cur, decoder->lstm_layers);
+
+    inpL = ggml_add(ctx0, inpL, out);
+
+    for (int layer_ix = 0; layer_ix < 4; layer_ix++) {
+        encodec_decoder_block block = decoder->blocks[layer_ix];
+
+        // upsampling layers
+        inpL = ggml_elu(ctx0, inpL);
+
+        inpL = strided_conv_transpose_1d(
+            ctx0, inpL, block.us_conv_w, block.us_conv_b, ratios[layer_ix]);
+
+        struct ggml_tensor *current = inpL;
+
+        // shortcut
+        struct ggml_tensor *shortcut = strided_conv_1d(
+            ctx0, inpL, block.conv_sc_w, block.conv_sc_b, stride);
+
+        // conv1
+        current = ggml_elu(ctx0, current);
+
+        current = strided_conv_1d(
+            ctx0, current, block.conv_1_w, block.conv_1_b, stride);
+
+        // conv2
+        current = ggml_elu(ctx0, current);
+
+        current = strided_conv_1d(
+            ctx0, current, block.conv_2_w, block.conv_2_b, stride);
+
+        // residual connection
+        inpL = ggml_add(ctx0, current, shortcut);
+    }
+
+    // final conv
+    inpL = ggml_elu(ctx0, inpL);
+
+    struct ggml_tensor *decoded_inp = strided_conv_1d(
+        ctx0, inpL, decoder->final_conv_w, decoder->final_conv_b, stride);
+
+    return decoded_inp;
+}
+
+struct ggml_tensor *encodec_forward_decoder(
+    const struct encodec_decoder * decoder,
+             struct ggml_context * main_ctx,
+              struct ggml_tensor * quantized_out,
+                       const int * ratios,
+                       const int   kernel_size,
+                       const int   res_kernel_size,
+                       const int   stride) {
+    // quantized_out lives in ctx0
+    if (!quantized_out) {
+        fprintf(stderr, "%s: null input tensor\n", __func__);
+        return NULL;
+    }
+
+    // setup decoder context
+    static size_t buf_size = ggml_tensor_overhead() * DECODER_TOTAL_NUM_NODES + ggml_graph_overhead();
+    buf_size += 1024 * 1024 * 1024; // 1 MB (extra safety margin)
+
+    struct ggml_init_params params = {
+        /* .mem_size   = */ buf_size,
+        /* .mem_buffer = */ NULL,
+        /* .no_alloc   = */ false,
+    };
+
+    struct ggml_context * decoder_ctx = ggml_init(params);
+    struct ggml_cgraph  * gf = ggml_new_graph(decoder_ctx);
+
+    // step 0
+    struct ggml_tensor * inpL = encodec_forward_decoder_step_0(decoder, decoder_ctx, quantized_out, stride);
+    ggml_set_output(inpL);
+
+    ggml_build_forward_expand(gf, inpL);
+    printf("[decoder] number of nodes: %d\n", ggml_graph_n_nodes(gf));
+    ggml_graph_compute_with_ctx(decoder_ctx, gf, 4 /* num_threads */);
+
+    // step 1
+    struct ggml_tensor * out = encodec_forward_decoder_step_1(decoder, decoder_ctx, inpL, ratios, stride, kernel_size, res_kernel_size);
+    ggml_set_output(out);
+
+    ggml_build_forward_expand(gf, out);
+    printf("[decoder] number of nodes: %d\n", ggml_graph_n_nodes(gf));
+    ggml_graph_compute_with_ctx(decoder_ctx, gf, 4 /* num_threads */);
+
+    // copy output to main context
+    struct ggml_tensor * decoded = ggml_new_tensor_2d(main_ctx, GGML_TYPE_F32, out->ne[0], out->ne[1]);
+    memcpy(decoded->data, out->data, ggml_nbytes(out));
+    ggml_set_name(decoded, "decoded");
+
+    ggml_free(decoder_ctx);
+
+    return decoded;
+}
diff --git a/decoder.h b/decoder.h
@@ -3,10 +3,9 @@
 #include <vector>
 
 #include "ggml.h"
-#include "ggml-alloc.h"
-#include "ggml-backend.h"
 
 #include "lstm.h"
+#include "ops.h"
 #include "utils.h"
 
 
@@ -32,7 +31,7 @@ struct encodec_decoder {
     struct ggml_tensor *init_conv_w;
     struct ggml_tensor *init_conv_b;
 
-    encodec_lstm lstm;
+    encodec_lstm_layers lstm_layers;
 
     struct ggml_tensor *final_conv_w;
     struct ggml_tensor *final_conv_b;
@@ -41,73 +40,10 @@ struct encodec_decoder {
 };
 
 struct ggml_tensor *encodec_forward_decoder(
-    const struct encodec_decoder *decoder, struct ggml_allocr *allocr, struct ggml_context *ctx0,
-    struct ggml_tensor *quantized_out, const int *ratios, const int kernel_size, const int res_kernel_size,
-    const int stride) {
-
-    if (!quantized_out) {
-        fprintf(stderr, "%s: null input tensor\n", __func__);
-        return NULL;
-    }
-
-    struct ggml_tensor *inpL = strided_conv_1d(
-        ctx0, quantized_out, decoder->init_conv_w, decoder->init_conv_b, stride);
-
-    // lstm
-    {
-        struct ggml_tensor *cur = inpL;
-
-        const encodec_lstm lstm = decoder->lstm;
-
-        // first lstm layer
-        struct ggml_tensor *hs1 = forward_pass_lstm_unilayer(
-            ctx0, allocr, cur, lstm.l0_ih_w, lstm.l0_hh_w,
-            lstm.l0_ih_b, lstm.l0_hh_b);
-
-        // second lstm layer
-        struct ggml_tensor *out = forward_pass_lstm_unilayer(
-            ctx0, allocr, hs1, lstm.l1_ih_w, lstm.l1_hh_w,
-            lstm.l1_ih_b, lstm.l1_hh_b);
-
-        inpL = ggml_add(ctx0, inpL, out);
-    }
-
-    for (int layer_ix = 0; layer_ix < 4; layer_ix++) {
-        encodec_decoder_block block = decoder->blocks[layer_ix];
-
-        // upsampling layers
-        inpL = ggml_elu(ctx0, inpL);
-
-        inpL = strided_conv_transpose_1d(
-            ctx0, inpL, block.us_conv_w, block.us_conv_b, ratios[layer_ix]);
-
-        struct ggml_tensor *current = inpL;
-
-        // shortcut
-        struct ggml_tensor *shortcut = strided_conv_1d(
-            ctx0, inpL, block.conv_sc_w, block.conv_sc_b, stride);
-
-        // conv1
-        current = ggml_elu(ctx0, current);
-
-        current = strided_conv_1d(
-            ctx0, current, block.conv_1_w, block.conv_1_b, stride);
-
-        // conv2
-        current = ggml_elu(ctx0, current);
-
-        current = strided_conv_1d(
-            ctx0, current, block.conv_2_w, block.conv_2_b, stride);
-
-        // residual connection
-        inpL = ggml_add(ctx0, current, shortcut);
-    }
-
-    // final conv
-    inpL = ggml_elu(ctx0, inpL);
-
-    struct ggml_tensor *decoded_inp = strided_conv_1d(
-        ctx0, inpL, decoder->final_conv_w, decoder->final_conv_b, stride);
-
-    return decoded_inp;
-}
+    const struct encodec_decoder * decoder,
+             struct ggml_context * main_ctx,
+              struct ggml_tensor * quantized_out,
+                       const int * ratios,
+                       const int   kernel_size,
+                       const int   res_kernel_size,
+                       const int   stride);