Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore(sync): update Encodec.cpp to sync with latest ggml API #45

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ endif()

set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
set(CMAKE_CXX_FLAGS_RELEASE "-O3")
set(CMAKE_CXX_FLAGS_DEBUG "-g -O0")

if(CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR)
set(ENCODEC_STANDALONE ON)
Expand All @@ -27,12 +28,16 @@ add_library(
${ENCODEC_LIB} STATIC
encodec.cpp
encodec.h
encoder.cpp
encoder.h
decoder.cpp
decoder.h
quantizer.h
ops.cpp
ops.h
utils.cpp
utils.h
lstm.cpp
lstm.h
)

Expand All @@ -44,6 +49,10 @@ target_link_libraries(${ENCODEC_LIB} PUBLIC ggml)
target_include_directories(${ENCODEC_LIB} PUBLIC .)
target_compile_features(${ENCODEC_LIB} PUBLIC cxx_std_11)

if (CMAKE_BUILD_TYPE STREQUAL "Debug")
target_compile_options(${ENCODEC_LIB} PRIVATE -g -O0)
endif()

if (GGML_CUBLAS)
add_compile_definitions(GGML_USE_CUBLAS)
endif()
Expand Down
134 changes: 134 additions & 0 deletions decoder.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
/* Implementation of the decoder part of the Encodec model.

For more details, see the explanation in the encoder.cpp file.
*/

#include "ggml.h"

#include "decoder.h"
#include "lstm.h"
#include "ops.h"
#include "utils.h"

const static int DECODER_TOTAL_NUM_NODES = 220;

static struct ggml_tensor *encodec_forward_decoder_step_0(
const struct encodec_decoder * decoder,
struct ggml_context * ctx0,
struct ggml_tensor * quantized_out,
const int stride) {

struct ggml_tensor *inpL = strided_conv_1d(
ctx0, quantized_out, decoder->init_conv_w, decoder->init_conv_b, stride);

return inpL;
}

static struct ggml_tensor *encodec_forward_decoder_step_1(
const struct encodec_decoder * decoder,
struct ggml_context * ctx0,
struct ggml_tensor * inpL,
const int * ratios,
const int stride,
const int kernel_size,
const int res_kernel_size) {

struct ggml_tensor *cur = inpL;

// multi-layer lstm
struct ggml_tensor *out = encodec_lstm(ctx0, cur, decoder->lstm_layers);

inpL = ggml_add(ctx0, inpL, out);

for (int layer_ix = 0; layer_ix < 4; layer_ix++) {
encodec_decoder_block block = decoder->blocks[layer_ix];

// upsampling layers
inpL = ggml_elu(ctx0, inpL);

inpL = strided_conv_transpose_1d(
ctx0, inpL, block.us_conv_w, block.us_conv_b, ratios[layer_ix]);

struct ggml_tensor *current = inpL;

// shortcut
struct ggml_tensor *shortcut = strided_conv_1d(
ctx0, inpL, block.conv_sc_w, block.conv_sc_b, stride);

// conv1
current = ggml_elu(ctx0, current);

current = strided_conv_1d(
ctx0, current, block.conv_1_w, block.conv_1_b, stride);

// conv2
current = ggml_elu(ctx0, current);

current = strided_conv_1d(
ctx0, current, block.conv_2_w, block.conv_2_b, stride);

// residual connection
inpL = ggml_add(ctx0, current, shortcut);
}

// final conv
inpL = ggml_elu(ctx0, inpL);

struct ggml_tensor *decoded_inp = strided_conv_1d(
ctx0, inpL, decoder->final_conv_w, decoder->final_conv_b, stride);

return decoded_inp;
}

struct ggml_tensor *encodec_forward_decoder(
const struct encodec_decoder * decoder,
struct ggml_context * main_ctx,
struct ggml_tensor * quantized_out,
const int * ratios,
const int kernel_size,
const int res_kernel_size,
const int stride) {
// quantized_out lives in ctx0
if (!quantized_out) {
fprintf(stderr, "%s: null input tensor\n", __func__);
return NULL;
}

// setup decoder context
static size_t buf_size = ggml_tensor_overhead() * DECODER_TOTAL_NUM_NODES + ggml_graph_overhead();
buf_size += 1024 * 1024 * 1024; // 1 MB (extra safety margin)

struct ggml_init_params params = {
/* .mem_size = */ buf_size,
/* .mem_buffer = */ NULL,
/* .no_alloc = */ false,
};

struct ggml_context * decoder_ctx = ggml_init(params);
struct ggml_cgraph * gf = ggml_new_graph(decoder_ctx);

// step 0
struct ggml_tensor * inpL = encodec_forward_decoder_step_0(decoder, decoder_ctx, quantized_out, stride);
ggml_set_output(inpL);

ggml_build_forward_expand(gf, inpL);
printf("[decoder] number of nodes: %d\n", ggml_graph_n_nodes(gf));
ggml_graph_compute_with_ctx(decoder_ctx, gf, 4 /* num_threads */);

// step 1
struct ggml_tensor * out = encodec_forward_decoder_step_1(decoder, decoder_ctx, inpL, ratios, stride, kernel_size, res_kernel_size);
ggml_set_output(out);

ggml_build_forward_expand(gf, out);
printf("[decoder] number of nodes: %d\n", ggml_graph_n_nodes(gf));
ggml_graph_compute_with_ctx(decoder_ctx, gf, 4 /* num_threads */);

// copy output to main context
struct ggml_tensor * decoded = ggml_new_tensor_2d(main_ctx, GGML_TYPE_F32, out->ne[0], out->ne[1]);
memcpy(decoded->data, out->data, ggml_nbytes(out));
ggml_set_name(decoded, "decoded");

ggml_free(decoder_ctx);

return decoded;
}
82 changes: 9 additions & 73 deletions decoder.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,9 @@
#include <vector>

#include "ggml.h"
#include "ggml-alloc.h"
#include "ggml-backend.h"

#include "lstm.h"
#include "ops.h"
#include "utils.h"


Expand All @@ -32,7 +31,7 @@ struct encodec_decoder {
struct ggml_tensor *init_conv_w;
struct ggml_tensor *init_conv_b;

encodec_lstm lstm;
encodec_lstm_layers lstm_layers;

struct ggml_tensor *final_conv_w;
struct ggml_tensor *final_conv_b;
Expand All @@ -41,73 +40,10 @@ struct encodec_decoder {
};

struct ggml_tensor *encodec_forward_decoder(
const struct encodec_decoder *decoder, struct ggml_allocr *allocr, struct ggml_context *ctx0,
struct ggml_tensor *quantized_out, const int *ratios, const int kernel_size, const int res_kernel_size,
const int stride) {

if (!quantized_out) {
fprintf(stderr, "%s: null input tensor\n", __func__);
return NULL;
}

struct ggml_tensor *inpL = strided_conv_1d(
ctx0, quantized_out, decoder->init_conv_w, decoder->init_conv_b, stride);

// lstm
{
struct ggml_tensor *cur = inpL;

const encodec_lstm lstm = decoder->lstm;

// first lstm layer
struct ggml_tensor *hs1 = forward_pass_lstm_unilayer(
ctx0, allocr, cur, lstm.l0_ih_w, lstm.l0_hh_w,
lstm.l0_ih_b, lstm.l0_hh_b);

// second lstm layer
struct ggml_tensor *out = forward_pass_lstm_unilayer(
ctx0, allocr, hs1, lstm.l1_ih_w, lstm.l1_hh_w,
lstm.l1_ih_b, lstm.l1_hh_b);

inpL = ggml_add(ctx0, inpL, out);
}

for (int layer_ix = 0; layer_ix < 4; layer_ix++) {
encodec_decoder_block block = decoder->blocks[layer_ix];

// upsampling layers
inpL = ggml_elu(ctx0, inpL);

inpL = strided_conv_transpose_1d(
ctx0, inpL, block.us_conv_w, block.us_conv_b, ratios[layer_ix]);

struct ggml_tensor *current = inpL;

// shortcut
struct ggml_tensor *shortcut = strided_conv_1d(
ctx0, inpL, block.conv_sc_w, block.conv_sc_b, stride);

// conv1
current = ggml_elu(ctx0, current);

current = strided_conv_1d(
ctx0, current, block.conv_1_w, block.conv_1_b, stride);

// conv2
current = ggml_elu(ctx0, current);

current = strided_conv_1d(
ctx0, current, block.conv_2_w, block.conv_2_b, stride);

// residual connection
inpL = ggml_add(ctx0, current, shortcut);
}

// final conv
inpL = ggml_elu(ctx0, inpL);

struct ggml_tensor *decoded_inp = strided_conv_1d(
ctx0, inpL, decoder->final_conv_w, decoder->final_conv_b, stride);

return decoded_inp;
}
const struct encodec_decoder * decoder,
struct ggml_context * main_ctx,
struct ggml_tensor * quantized_out,
const int * ratios,
const int kernel_size,
const int res_kernel_size,
const int stride);
Loading
Loading