From 3a49447ab4df9ee5c6468a65da4175ab572f6712 Mon Sep 17 00:00:00 2001 From: Jhen Date: Wed, 25 Oct 2023 17:49:10 +0800 Subject: [PATCH] fix: add prefix for gguf_ defs to avoid conflit with whisper.rn --- cpp/ggml.c | 674 +++++++++++++++++++++---------------------- cpp/ggml.h | 160 +++++----- cpp/llama.cpp | 160 +++++----- scripts/bootstrap.sh | 4 + 4 files changed, 501 insertions(+), 497 deletions(-) diff --git a/cpp/ggml.c b/cpp/ggml.c index 4b783a8..d822a32 100644 --- a/cpp/ggml.c +++ b/cpp/ggml.c @@ -20775,46 +20775,46 @@ size_t lm_ggml_quantize_chunk(enum lm_ggml_type type, const float * src, void * //////////////////////////////////////////////////////////////////////////////// -struct gguf_str { +struct lm_gguf_str { uint64_t n; // GGUFv2 char * data; }; -static const size_t GGUF_TYPE_SIZE[GGUF_TYPE_COUNT] = { - [GGUF_TYPE_UINT8] = sizeof(uint8_t), - [GGUF_TYPE_INT8] = sizeof(int8_t), - [GGUF_TYPE_UINT16] = sizeof(uint16_t), - [GGUF_TYPE_INT16] = sizeof(int16_t), - [GGUF_TYPE_UINT32] = sizeof(uint32_t), - [GGUF_TYPE_INT32] = sizeof(int32_t), - [GGUF_TYPE_FLOAT32] = sizeof(float), - [GGUF_TYPE_BOOL] = sizeof(bool), - [GGUF_TYPE_STRING] = sizeof(struct gguf_str), - [GGUF_TYPE_UINT64] = sizeof(uint64_t), - [GGUF_TYPE_INT64] = sizeof(int64_t), - [GGUF_TYPE_FLOAT64] = sizeof(double), - [GGUF_TYPE_ARRAY] = 0, // undefined +static const size_t LM_GGUF_TYPE_SIZE[LM_GGUF_TYPE_COUNT] = { + [LM_GGUF_TYPE_UINT8] = sizeof(uint8_t), + [LM_GGUF_TYPE_INT8] = sizeof(int8_t), + [LM_GGUF_TYPE_UINT16] = sizeof(uint16_t), + [LM_GGUF_TYPE_INT16] = sizeof(int16_t), + [LM_GGUF_TYPE_UINT32] = sizeof(uint32_t), + [LM_GGUF_TYPE_INT32] = sizeof(int32_t), + [LM_GGUF_TYPE_FLOAT32] = sizeof(float), + [LM_GGUF_TYPE_BOOL] = sizeof(bool), + [LM_GGUF_TYPE_STRING] = sizeof(struct lm_gguf_str), + [LM_GGUF_TYPE_UINT64] = sizeof(uint64_t), + [LM_GGUF_TYPE_INT64] = sizeof(int64_t), + [LM_GGUF_TYPE_FLOAT64] = sizeof(double), + [LM_GGUF_TYPE_ARRAY] = 0, // undefined }; -static_assert(GGUF_TYPE_COUNT == 13, "GGUF_TYPE_COUNT != 13"); - -static const char * GGUF_TYPE_NAME[GGUF_TYPE_COUNT] = { - [GGUF_TYPE_UINT8] = "u8", - [GGUF_TYPE_INT8] = "i8", - [GGUF_TYPE_UINT16] = "u16", - [GGUF_TYPE_INT16] = "i16", - [GGUF_TYPE_UINT32] = "u32", - [GGUF_TYPE_INT32] = "i32", - [GGUF_TYPE_FLOAT32] = "f32", - [GGUF_TYPE_BOOL] = "bool", - [GGUF_TYPE_STRING] = "str", - [GGUF_TYPE_ARRAY] = "arr", - [GGUF_TYPE_UINT64] = "u64", - [GGUF_TYPE_INT64] = "i64", - [GGUF_TYPE_FLOAT64] = "f64", +static_assert(LM_GGUF_TYPE_COUNT == 13, "LM_GGUF_TYPE_COUNT != 13"); + +static const char * LM_GGUF_TYPE_NAME[LM_GGUF_TYPE_COUNT] = { + [LM_GGUF_TYPE_UINT8] = "u8", + [LM_GGUF_TYPE_INT8] = "i8", + [LM_GGUF_TYPE_UINT16] = "u16", + [LM_GGUF_TYPE_INT16] = "i16", + [LM_GGUF_TYPE_UINT32] = "u32", + [LM_GGUF_TYPE_INT32] = "i32", + [LM_GGUF_TYPE_FLOAT32] = "f32", + [LM_GGUF_TYPE_BOOL] = "bool", + [LM_GGUF_TYPE_STRING] = "str", + [LM_GGUF_TYPE_ARRAY] = "arr", + [LM_GGUF_TYPE_UINT64] = "u64", + [LM_GGUF_TYPE_INT64] = "i64", + [LM_GGUF_TYPE_FLOAT64] = "f64", }; -static_assert(GGUF_TYPE_COUNT == 13, "GGUF_TYPE_COUNT != 13"); +static_assert(LM_GGUF_TYPE_COUNT == 13, "LM_GGUF_TYPE_COUNT != 13"); -union gguf_value { +union lm_gguf_value { uint8_t uint8; int8_t int8; uint16_t uint16; @@ -20827,32 +20827,32 @@ union gguf_value { double float64; bool bool_; - struct gguf_str str; + struct lm_gguf_str str; struct { - enum gguf_type type; + enum lm_gguf_type type; uint64_t n; // GGUFv2 void * data; } arr; }; -struct gguf_kv { - struct gguf_str key; +struct lm_gguf_kv { + struct lm_gguf_str key; - enum gguf_type type; - union gguf_value value; + enum lm_gguf_type type; + union lm_gguf_value value; }; -struct gguf_header { +struct lm_gguf_header { char magic[4]; uint32_t version; uint64_t n_tensors; // GGUFv2 uint64_t n_kv; // GGUFv2 }; -struct gguf_tensor_info { - struct gguf_str name; +struct lm_gguf_tensor_info { + struct lm_gguf_str name; uint32_t n_dims; uint64_t ne[LM_GGML_MAX_DIMS]; @@ -20866,11 +20866,11 @@ struct gguf_tensor_info { size_t size; }; -struct gguf_context { - struct gguf_header header; +struct lm_gguf_context { + struct lm_gguf_header header; - struct gguf_kv * kv; - struct gguf_tensor_info * infos; + struct lm_gguf_kv * kv; + struct lm_gguf_tensor_info * infos; size_t alignment; size_t offset; // offset of `data` from beginning of file @@ -20880,50 +20880,50 @@ struct gguf_context { void * data; }; -static bool gguf_fread_el(FILE * file, void * dst, size_t size, size_t * offset) { +static bool lm_gguf_fread_el(FILE * file, void * dst, size_t size, size_t * offset) { const size_t n = fread(dst, 1, size, file); *offset += n; return n == size; } // NOTE: temporary handling of GGUFv1 >> remove after Oct 2023 -static bool gguf_fread_str_cur(FILE * file, struct gguf_str * p, size_t * offset) { +static bool lm_gguf_fread_str_cur(FILE * file, struct lm_gguf_str * p, size_t * offset) { p->n = 0; p->data = NULL; bool ok = true; - ok = ok && gguf_fread_el(file, &p->n, sizeof(p->n), offset); p->data = calloc(p->n + 1, 1); - ok = ok && gguf_fread_el(file, p->data, p->n, offset); + ok = ok && lm_gguf_fread_el(file, &p->n, sizeof(p->n), offset); p->data = calloc(p->n + 1, 1); + ok = ok && lm_gguf_fread_el(file, p->data, p->n, offset); return ok; } -static bool gguf_fread_str_v1(FILE * file, struct gguf_str * p, size_t * offset) { +static bool lm_gguf_fread_str_v1(FILE * file, struct lm_gguf_str * p, size_t * offset) { p->n = 0; p->data = NULL; bool ok = true; uint32_t n = 0; - ok = ok && gguf_fread_el(file, &n, sizeof(n), offset); p->data = calloc(n + 1, 1); p->n = n; - ok = ok && gguf_fread_el(file, p->data, p->n, offset); + ok = ok && lm_gguf_fread_el(file, &n, sizeof(n), offset); p->data = calloc(n + 1, 1); p->n = n; + ok = ok && lm_gguf_fread_el(file, p->data, p->n, offset); return ok; } -struct gguf_context * gguf_init_empty(void) { - struct gguf_context * ctx = LM_GGML_ALIGNED_MALLOC(sizeof(struct gguf_context)); +struct lm_gguf_context * lm_gguf_init_empty(void) { + struct lm_gguf_context * ctx = LM_GGML_ALIGNED_MALLOC(sizeof(struct lm_gguf_context)); - memcpy(ctx->header.magic, GGUF_MAGIC, sizeof(ctx->header.magic)); - ctx->header.version = GGUF_VERSION; + memcpy(ctx->header.magic, LM_GGUF_MAGIC, sizeof(ctx->header.magic)); + ctx->header.version = LM_GGUF_VERSION; ctx->header.n_tensors = 0; ctx->header.n_kv = 0; ctx->kv = NULL; ctx->infos = NULL; - ctx->alignment = GGUF_DEFAULT_ALIGNMENT; + ctx->alignment = LM_GGUF_DEFAULT_ALIGNMENT; ctx->offset = 0; ctx->size = 0; @@ -20932,7 +20932,7 @@ struct gguf_context * gguf_init_empty(void) { return ctx; } -struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_params params) { +struct lm_gguf_context * lm_gguf_init_from_file(const char * fname, struct lm_gguf_init_params params) { FILE * file = fopen(fname, "rb"); if (!file) { return NULL; @@ -20945,10 +20945,10 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p // check the magic before making allocations { - gguf_fread_el(file, &magic, sizeof(magic), &offset); + lm_gguf_fread_el(file, &magic, sizeof(magic), &offset); for (uint32_t i = 0; i < sizeof(magic); i++) { - if (magic[i] != GGUF_MAGIC[i]) { + if (magic[i] != LM_GGUF_MAGIC[i]) { fprintf(stderr, "%s: invalid magic characters %s.\n", __func__, magic); fclose(file); return NULL; @@ -20958,7 +20958,7 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p bool ok = true; - struct gguf_context * ctx = LM_GGML_ALIGNED_MALLOC(sizeof(struct gguf_context)); + struct lm_gguf_context * ctx = LM_GGML_ALIGNED_MALLOC(sizeof(struct lm_gguf_context)); // read the header { @@ -20969,105 +20969,105 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p ctx->infos = NULL; ctx->data = NULL; - ok = ok && gguf_fread_el(file, &ctx->header.version, sizeof(ctx->header.version), &offset); + ok = ok && lm_gguf_fread_el(file, &ctx->header.version, sizeof(ctx->header.version), &offset); if (ctx->header.version == 1) { // NOTE: temporary handling of GGUFv1 >> remove after Oct 2023 uint32_t n_tensors = 0; uint32_t n_kv = 0; - ok = ok && gguf_fread_el(file, &n_tensors, sizeof(n_tensors), &offset); - ok = ok && gguf_fread_el(file, &n_kv, sizeof(n_kv), &offset); + ok = ok && lm_gguf_fread_el(file, &n_tensors, sizeof(n_tensors), &offset); + ok = ok && lm_gguf_fread_el(file, &n_kv, sizeof(n_kv), &offset); ctx->header.n_tensors = n_tensors; ctx->header.n_kv = n_kv; } else { - ok = ok && gguf_fread_el(file, &ctx->header.n_tensors, sizeof(ctx->header.n_tensors), &offset); - ok = ok && gguf_fread_el(file, &ctx->header.n_kv, sizeof(ctx->header.n_kv), &offset); + ok = ok && lm_gguf_fread_el(file, &ctx->header.n_tensors, sizeof(ctx->header.n_tensors), &offset); + ok = ok && lm_gguf_fread_el(file, &ctx->header.n_kv, sizeof(ctx->header.n_kv), &offset); } if (!ok) { fprintf(stderr, "%s: failed to read header\n", __func__); fclose(file); - gguf_free(ctx); + lm_gguf_free(ctx); return NULL; } } // NOTE: temporary handling of GGUFv1 >> remove after Oct 2023 - bool (* gguf_fread_str)(FILE *, struct gguf_str *, size_t *) = gguf_fread_str_cur; + bool (* lm_gguf_fread_str)(FILE *, struct lm_gguf_str *, size_t *) = lm_gguf_fread_str_cur; if (ctx->header.version == 1) { - gguf_fread_str = gguf_fread_str_v1; + lm_gguf_fread_str = lm_gguf_fread_str_v1; } // read the kv pairs { - ctx->kv = malloc(ctx->header.n_kv * sizeof(struct gguf_kv)); + ctx->kv = malloc(ctx->header.n_kv * sizeof(struct lm_gguf_kv)); for (uint32_t i = 0; i < ctx->header.n_kv; ++i) { - struct gguf_kv * kv = &ctx->kv[i]; + struct lm_gguf_kv * kv = &ctx->kv[i]; //fprintf(stderr, "%s: reading kv %d\n", __func__, i); - ok = ok && gguf_fread_str(file, &kv->key, &offset); - ok = ok && gguf_fread_el (file, &kv->type, sizeof(kv->type), &offset); + ok = ok && lm_gguf_fread_str(file, &kv->key, &offset); + ok = ok && lm_gguf_fread_el (file, &kv->type, sizeof(kv->type), &offset); //fprintf(stderr, "%s: reading kv with key %s\n", __func__, kv->key.data); switch (kv->type) { - case GGUF_TYPE_UINT8: ok = ok && gguf_fread_el (file, &kv->value.uint8, sizeof(kv->value.uint8), &offset); break; - case GGUF_TYPE_INT8: ok = ok && gguf_fread_el (file, &kv->value.int8, sizeof(kv->value.int8), &offset); break; - case GGUF_TYPE_UINT16: ok = ok && gguf_fread_el (file, &kv->value.uint16, sizeof(kv->value.uint16), &offset); break; - case GGUF_TYPE_INT16: ok = ok && gguf_fread_el (file, &kv->value.int16, sizeof(kv->value.int16), &offset); break; - case GGUF_TYPE_UINT32: ok = ok && gguf_fread_el (file, &kv->value.uint32, sizeof(kv->value.uint32), &offset); break; - case GGUF_TYPE_INT32: ok = ok && gguf_fread_el (file, &kv->value.int32, sizeof(kv->value.int32), &offset); break; - case GGUF_TYPE_FLOAT32: ok = ok && gguf_fread_el (file, &kv->value.float32, sizeof(kv->value.float32), &offset); break; - case GGUF_TYPE_UINT64: ok = ok && gguf_fread_el (file, &kv->value.uint64, sizeof(kv->value.uint64), &offset); break; - case GGUF_TYPE_INT64: ok = ok && gguf_fread_el (file, &kv->value.int64, sizeof(kv->value.int64), &offset); break; - case GGUF_TYPE_FLOAT64: ok = ok && gguf_fread_el (file, &kv->value.float64, sizeof(kv->value.float64), &offset); break; - case GGUF_TYPE_BOOL: ok = ok && gguf_fread_el (file, &kv->value.bool_, sizeof(kv->value.bool_), &offset); break; - case GGUF_TYPE_STRING: ok = ok && gguf_fread_str(file, &kv->value.str, &offset); break; - case GGUF_TYPE_ARRAY: + case LM_GGUF_TYPE_UINT8: ok = ok && lm_gguf_fread_el (file, &kv->value.uint8, sizeof(kv->value.uint8), &offset); break; + case LM_GGUF_TYPE_INT8: ok = ok && lm_gguf_fread_el (file, &kv->value.int8, sizeof(kv->value.int8), &offset); break; + case LM_GGUF_TYPE_UINT16: ok = ok && lm_gguf_fread_el (file, &kv->value.uint16, sizeof(kv->value.uint16), &offset); break; + case LM_GGUF_TYPE_INT16: ok = ok && lm_gguf_fread_el (file, &kv->value.int16, sizeof(kv->value.int16), &offset); break; + case LM_GGUF_TYPE_UINT32: ok = ok && lm_gguf_fread_el (file, &kv->value.uint32, sizeof(kv->value.uint32), &offset); break; + case LM_GGUF_TYPE_INT32: ok = ok && lm_gguf_fread_el (file, &kv->value.int32, sizeof(kv->value.int32), &offset); break; + case LM_GGUF_TYPE_FLOAT32: ok = ok && lm_gguf_fread_el (file, &kv->value.float32, sizeof(kv->value.float32), &offset); break; + case LM_GGUF_TYPE_UINT64: ok = ok && lm_gguf_fread_el (file, &kv->value.uint64, sizeof(kv->value.uint64), &offset); break; + case LM_GGUF_TYPE_INT64: ok = ok && lm_gguf_fread_el (file, &kv->value.int64, sizeof(kv->value.int64), &offset); break; + case LM_GGUF_TYPE_FLOAT64: ok = ok && lm_gguf_fread_el (file, &kv->value.float64, sizeof(kv->value.float64), &offset); break; + case LM_GGUF_TYPE_BOOL: ok = ok && lm_gguf_fread_el (file, &kv->value.bool_, sizeof(kv->value.bool_), &offset); break; + case LM_GGUF_TYPE_STRING: ok = ok && lm_gguf_fread_str(file, &kv->value.str, &offset); break; + case LM_GGUF_TYPE_ARRAY: { - ok = ok && gguf_fread_el(file, &kv->value.arr.type, sizeof(kv->value.arr.type), &offset); + ok = ok && lm_gguf_fread_el(file, &kv->value.arr.type, sizeof(kv->value.arr.type), &offset); if (ctx->header.version == 1) { // NOTE: temporary handling of GGUFv1 >> remove after Oct 2023 uint32_t n = 0; - ok = ok && gguf_fread_el(file, &n, sizeof(n), &offset); + ok = ok && lm_gguf_fread_el(file, &n, sizeof(n), &offset); kv->value.arr.n = n; } else { - ok = ok && gguf_fread_el(file, &kv->value.arr.n, sizeof(kv->value.arr.n), &offset); + ok = ok && lm_gguf_fread_el(file, &kv->value.arr.n, sizeof(kv->value.arr.n), &offset); } switch (kv->value.arr.type) { - case GGUF_TYPE_UINT8: - case GGUF_TYPE_INT8: - case GGUF_TYPE_UINT16: - case GGUF_TYPE_INT16: - case GGUF_TYPE_UINT32: - case GGUF_TYPE_INT32: - case GGUF_TYPE_FLOAT32: - case GGUF_TYPE_UINT64: - case GGUF_TYPE_INT64: - case GGUF_TYPE_FLOAT64: - case GGUF_TYPE_BOOL: + case LM_GGUF_TYPE_UINT8: + case LM_GGUF_TYPE_INT8: + case LM_GGUF_TYPE_UINT16: + case LM_GGUF_TYPE_INT16: + case LM_GGUF_TYPE_UINT32: + case LM_GGUF_TYPE_INT32: + case LM_GGUF_TYPE_FLOAT32: + case LM_GGUF_TYPE_UINT64: + case LM_GGUF_TYPE_INT64: + case LM_GGUF_TYPE_FLOAT64: + case LM_GGUF_TYPE_BOOL: { - kv->value.arr.data = malloc(kv->value.arr.n * GGUF_TYPE_SIZE[kv->value.arr.type]); - ok = ok && gguf_fread_el(file, kv->value.arr.data, kv->value.arr.n * GGUF_TYPE_SIZE[kv->value.arr.type], &offset); + kv->value.arr.data = malloc(kv->value.arr.n * LM_GGUF_TYPE_SIZE[kv->value.arr.type]); + ok = ok && lm_gguf_fread_el(file, kv->value.arr.data, kv->value.arr.n * LM_GGUF_TYPE_SIZE[kv->value.arr.type], &offset); } break; - case GGUF_TYPE_STRING: + case LM_GGUF_TYPE_STRING: { - kv->value.arr.data = malloc(kv->value.arr.n * sizeof(struct gguf_str)); + kv->value.arr.data = malloc(kv->value.arr.n * sizeof(struct lm_gguf_str)); for (uint32_t j = 0; j < kv->value.arr.n; ++j) { - ok = ok && gguf_fread_str(file, &((struct gguf_str *) kv->value.arr.data)[j], &offset); + ok = ok && lm_gguf_fread_str(file, &((struct lm_gguf_str *) kv->value.arr.data)[j], &offset); } } break; - case GGUF_TYPE_ARRAY: - case GGUF_TYPE_COUNT: LM_GGML_ASSERT(false && "invalid type"); break; + case LM_GGUF_TYPE_ARRAY: + case LM_GGUF_TYPE_COUNT: LM_GGML_ASSERT(false && "invalid type"); break; } } break; - case GGUF_TYPE_COUNT: LM_GGML_ASSERT(false && "invalid type"); + case LM_GGUF_TYPE_COUNT: LM_GGML_ASSERT(false && "invalid type"); } if (!ok) { @@ -21078,51 +21078,51 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p if (!ok) { fprintf(stderr, "%s: failed to read key-value pairs\n", __func__); fclose(file); - gguf_free(ctx); + lm_gguf_free(ctx); return NULL; } } // read the tensor infos { - ctx->infos = malloc(ctx->header.n_tensors * sizeof(struct gguf_tensor_info)); + ctx->infos = malloc(ctx->header.n_tensors * sizeof(struct lm_gguf_tensor_info)); for (uint32_t i = 0; i < ctx->header.n_tensors; ++i) { - struct gguf_tensor_info * info = &ctx->infos[i]; + struct lm_gguf_tensor_info * info = &ctx->infos[i]; for (int j = 0; j < LM_GGML_MAX_DIMS; ++j) { info->ne[j] = 1; } - ok = ok && gguf_fread_str(file, &info->name, &offset); - ok = ok && gguf_fread_el (file, &info->n_dims, sizeof(info->n_dims), &offset); + ok = ok && lm_gguf_fread_str(file, &info->name, &offset); + ok = ok && lm_gguf_fread_el (file, &info->n_dims, sizeof(info->n_dims), &offset); for (uint32_t j = 0; j < info->n_dims; ++j) { if (ctx->header.version == 1) { // NOTE: temporary handling of GGUFv1 >> remove after Oct 2023 uint32_t t = 0; - ok = ok && gguf_fread_el(file, &t, sizeof(t), &offset); + ok = ok && lm_gguf_fread_el(file, &t, sizeof(t), &offset); info->ne[j] = t; } else { - ok = ok && gguf_fread_el(file, &info->ne[j], sizeof(info->ne[j]), &offset); + ok = ok && lm_gguf_fread_el(file, &info->ne[j], sizeof(info->ne[j]), &offset); } } - ok = ok && gguf_fread_el (file, &info->type, sizeof(info->type), &offset); - ok = ok && gguf_fread_el (file, &info->offset, sizeof(info->offset), &offset); + ok = ok && lm_gguf_fread_el (file, &info->type, sizeof(info->type), &offset); + ok = ok && lm_gguf_fread_el (file, &info->offset, sizeof(info->offset), &offset); if (!ok) { fprintf(stderr, "%s: failed to read tensor info\n", __func__); fclose(file); - gguf_free(ctx); + lm_gguf_free(ctx); return NULL; } } } - ctx->alignment = GGUF_DEFAULT_ALIGNMENT; + ctx->alignment = LM_GGUF_DEFAULT_ALIGNMENT; - int alignment_idx = gguf_find_key(ctx, "general.alignment"); + int alignment_idx = lm_gguf_find_key(ctx, "general.alignment"); if (alignment_idx != -1) { - ctx->alignment = gguf_get_val_u32(ctx, alignment_idx); + ctx->alignment = lm_gguf_get_val_u32(ctx, alignment_idx); } // we require the data section to be aligned, so take into account any padding @@ -21142,7 +21142,7 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p { ctx->size = 0; for (uint32_t i = 0; i < ctx->header.n_tensors; ++i) { - struct gguf_tensor_info * info = &ctx->infos[i]; + struct lm_gguf_tensor_info * info = &ctx->infos[i]; const int64_t ne = (int64_t) info->ne[0] * @@ -21154,7 +21154,7 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p fprintf(stderr, "%s: tensor '%s' number of elements (%" PRId64 ") is not a multiple of block size (%d)\n", __func__, info->name.data, ne, lm_ggml_blck_size(info->type)); fclose(file); - gguf_free(ctx); + lm_gguf_free(ctx); return NULL; } @@ -21166,7 +21166,7 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p // load the tensor data only if requested if (params.ctx != NULL) { - // if the provided gguf_context is no_alloc, then we create "empty" tensors and do not read the binary blob + // if the provided lm_gguf_context is no_alloc, then we create "empty" tensors and do not read the binary blob // otherwise, we load the binary blob into the created lm_ggml_context as well, and point the "data" members of // the lm_ggml_tensor structs to the appropriate locations in the binary blob @@ -21194,13 +21194,13 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p ok = ok && data != NULL; // read the binary blob with the tensor data - ok = ok && gguf_fread_el(file, data->data, ctx->size, &offset); + ok = ok && lm_gguf_fread_el(file, data->data, ctx->size, &offset); if (!ok) { fprintf(stderr, "%s: failed to read tensor data\n", __func__); fclose(file); lm_ggml_free(ctx_data); - gguf_free(ctx); + lm_gguf_free(ctx); return NULL; } @@ -21239,7 +21239,7 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p fprintf(stderr, "%s: failed to read the tensor data\n", __func__); fclose(file); lm_ggml_free(ctx_data); - gguf_free(ctx); + lm_gguf_free(ctx); return NULL; } @@ -21251,7 +21251,7 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p return ctx; } -void gguf_free(struct gguf_context * ctx) { +void lm_gguf_free(struct lm_gguf_context * ctx) { if (ctx == NULL) { return; } @@ -21259,23 +21259,23 @@ void gguf_free(struct gguf_context * ctx) { if (ctx->kv) { // free string memory - not great.. for (uint32_t i = 0; i < ctx->header.n_kv; ++i) { - struct gguf_kv * kv = &ctx->kv[i]; + struct lm_gguf_kv * kv = &ctx->kv[i]; if (kv->key.data) { free(kv->key.data); } - if (kv->type == GGUF_TYPE_STRING) { + if (kv->type == LM_GGUF_TYPE_STRING) { if (kv->value.str.data) { free(kv->value.str.data); } } - if (kv->type == GGUF_TYPE_ARRAY) { + if (kv->type == LM_GGUF_TYPE_ARRAY) { if (kv->value.arr.data) { - if (kv->value.arr.type == GGUF_TYPE_STRING) { + if (kv->value.arr.type == LM_GGUF_TYPE_STRING) { for (uint32_t j = 0; j < kv->value.arr.n; ++j) { - struct gguf_str * str = &((struct gguf_str *) kv->value.arr.data)[j]; + struct lm_gguf_str * str = &((struct lm_gguf_str *) kv->value.arr.data)[j]; if (str->data) { free(str->data); } @@ -21291,7 +21291,7 @@ void gguf_free(struct gguf_context * ctx) { if (ctx->infos) { for (uint32_t i = 0; i < ctx->header.n_tensors; ++i) { - struct gguf_tensor_info * info = &ctx->infos[i]; + struct lm_gguf_tensor_info * info = &ctx->infos[i]; if (info->name.data) { free(info->name.data); @@ -21304,38 +21304,38 @@ void gguf_free(struct gguf_context * ctx) { LM_GGML_ALIGNED_FREE(ctx); } -const char * gguf_type_name(enum gguf_type type) { - return GGUF_TYPE_NAME[type]; +const char * lm_gguf_type_name(enum lm_gguf_type type) { + return LM_GGUF_TYPE_NAME[type]; } -int gguf_get_version(const struct gguf_context * ctx) { +int lm_gguf_get_version(const struct lm_gguf_context * ctx) { return ctx->header.version; } -size_t gguf_get_alignment(const struct gguf_context * ctx) { +size_t lm_gguf_get_alignment(const struct lm_gguf_context * ctx) { return ctx->alignment; } -size_t gguf_get_data_offset(const struct gguf_context * ctx) { +size_t lm_gguf_get_data_offset(const struct lm_gguf_context * ctx) { return ctx->offset; } -void * gguf_get_data(const struct gguf_context * ctx) { +void * lm_gguf_get_data(const struct lm_gguf_context * ctx) { return ctx->data; } -int gguf_get_n_kv(const struct gguf_context * ctx) { +int lm_gguf_get_n_kv(const struct lm_gguf_context * ctx) { return ctx->header.n_kv; } -int gguf_find_key(const struct gguf_context * ctx, const char * key) { +int lm_gguf_find_key(const struct lm_gguf_context * ctx, const char * key) { // return -1 if key not found int keyfound = -1; - const int n_kv = gguf_get_n_kv(ctx); + const int n_kv = lm_gguf_get_n_kv(ctx); for (int i = 0; i < n_kv; ++i) { - if (strcmp(key, gguf_get_key(ctx, i)) == 0) { + if (strcmp(key, lm_gguf_get_key(ctx, i)) == 0) { keyfound = i; break; } @@ -21344,108 +21344,108 @@ int gguf_find_key(const struct gguf_context * ctx, const char * key) { return keyfound; } -const char * gguf_get_key(const struct gguf_context * ctx, int key_id) { +const char * lm_gguf_get_key(const struct lm_gguf_context * ctx, int key_id) { return ctx->kv[key_id].key.data; } -enum gguf_type gguf_get_kv_type(const struct gguf_context * ctx, int key_id) { +enum lm_gguf_type lm_gguf_get_kv_type(const struct lm_gguf_context * ctx, int key_id) { return ctx->kv[key_id].type; } -enum gguf_type gguf_get_arr_type(const struct gguf_context * ctx, int key_id) { - LM_GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_ARRAY); +enum lm_gguf_type lm_gguf_get_arr_type(const struct lm_gguf_context * ctx, int key_id) { + LM_GGML_ASSERT(ctx->kv[key_id].type == LM_GGUF_TYPE_ARRAY); return ctx->kv[key_id].value.arr.type; } -const void * gguf_get_arr_data(const struct gguf_context * ctx, int key_id) { - LM_GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_ARRAY); +const void * lm_gguf_get_arr_data(const struct lm_gguf_context * ctx, int key_id) { + LM_GGML_ASSERT(ctx->kv[key_id].type == LM_GGUF_TYPE_ARRAY); return ctx->kv[key_id].value.arr.data; } -const char * gguf_get_arr_str(const struct gguf_context * ctx, int key_id, int i) { - LM_GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_ARRAY); - struct gguf_kv * kv = &ctx->kv[key_id]; - struct gguf_str * str = &((struct gguf_str *) kv->value.arr.data)[i]; +const char * lm_gguf_get_arr_str(const struct lm_gguf_context * ctx, int key_id, int i) { + LM_GGML_ASSERT(ctx->kv[key_id].type == LM_GGUF_TYPE_ARRAY); + struct lm_gguf_kv * kv = &ctx->kv[key_id]; + struct lm_gguf_str * str = &((struct lm_gguf_str *) kv->value.arr.data)[i]; return str->data; } -int gguf_get_arr_n(const struct gguf_context * ctx, int key_id) { - LM_GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_ARRAY); +int lm_gguf_get_arr_n(const struct lm_gguf_context * ctx, int key_id) { + LM_GGML_ASSERT(ctx->kv[key_id].type == LM_GGUF_TYPE_ARRAY); return ctx->kv[key_id].value.arr.n; } -uint8_t gguf_get_val_u8(const struct gguf_context * ctx, int key_id) { - LM_GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_UINT8); +uint8_t lm_gguf_get_val_u8(const struct lm_gguf_context * ctx, int key_id) { + LM_GGML_ASSERT(ctx->kv[key_id].type == LM_GGUF_TYPE_UINT8); return ctx->kv[key_id].value.uint8; } -int8_t gguf_get_val_i8(const struct gguf_context * ctx, int key_id) { - LM_GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_INT8); +int8_t lm_gguf_get_val_i8(const struct lm_gguf_context * ctx, int key_id) { + LM_GGML_ASSERT(ctx->kv[key_id].type == LM_GGUF_TYPE_INT8); return ctx->kv[key_id].value.int8; } -uint16_t gguf_get_val_u16(const struct gguf_context * ctx, int key_id) { - LM_GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_UINT16); +uint16_t lm_gguf_get_val_u16(const struct lm_gguf_context * ctx, int key_id) { + LM_GGML_ASSERT(ctx->kv[key_id].type == LM_GGUF_TYPE_UINT16); return ctx->kv[key_id].value.uint16; } -int16_t gguf_get_val_i16(const struct gguf_context * ctx, int key_id) { - LM_GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_INT16); +int16_t lm_gguf_get_val_i16(const struct lm_gguf_context * ctx, int key_id) { + LM_GGML_ASSERT(ctx->kv[key_id].type == LM_GGUF_TYPE_INT16); return ctx->kv[key_id].value.int16; } -uint32_t gguf_get_val_u32(const struct gguf_context * ctx, int key_id) { - LM_GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_UINT32); +uint32_t lm_gguf_get_val_u32(const struct lm_gguf_context * ctx, int key_id) { + LM_GGML_ASSERT(ctx->kv[key_id].type == LM_GGUF_TYPE_UINT32); return ctx->kv[key_id].value.uint32; } -int32_t gguf_get_val_i32(const struct gguf_context * ctx, int key_id) { - LM_GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_INT32); +int32_t lm_gguf_get_val_i32(const struct lm_gguf_context * ctx, int key_id) { + LM_GGML_ASSERT(ctx->kv[key_id].type == LM_GGUF_TYPE_INT32); return ctx->kv[key_id].value.int32; } -float gguf_get_val_f32(const struct gguf_context * ctx, int key_id) { - LM_GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_FLOAT32); +float lm_gguf_get_val_f32(const struct lm_gguf_context * ctx, int key_id) { + LM_GGML_ASSERT(ctx->kv[key_id].type == LM_GGUF_TYPE_FLOAT32); return ctx->kv[key_id].value.float32; } -uint64_t gguf_get_val_u64(const struct gguf_context * ctx, int key_id) { - LM_GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_UINT64); +uint64_t lm_gguf_get_val_u64(const struct lm_gguf_context * ctx, int key_id) { + LM_GGML_ASSERT(ctx->kv[key_id].type == LM_GGUF_TYPE_UINT64); return ctx->kv[key_id].value.uint64; } -int64_t gguf_get_val_i64(const struct gguf_context * ctx, int key_id) { - LM_GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_INT64); +int64_t lm_gguf_get_val_i64(const struct lm_gguf_context * ctx, int key_id) { + LM_GGML_ASSERT(ctx->kv[key_id].type == LM_GGUF_TYPE_INT64); return ctx->kv[key_id].value.int64; } -double gguf_get_val_f64(const struct gguf_context * ctx, int key_id) { - LM_GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_FLOAT64); +double lm_gguf_get_val_f64(const struct lm_gguf_context * ctx, int key_id) { + LM_GGML_ASSERT(ctx->kv[key_id].type == LM_GGUF_TYPE_FLOAT64); return ctx->kv[key_id].value.float64; } -bool gguf_get_val_bool(const struct gguf_context * ctx, int key_id) { - LM_GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_BOOL); +bool lm_gguf_get_val_bool(const struct lm_gguf_context * ctx, int key_id) { + LM_GGML_ASSERT(ctx->kv[key_id].type == LM_GGUF_TYPE_BOOL); return ctx->kv[key_id].value.bool_; } -const char * gguf_get_val_str(const struct gguf_context * ctx, int key_id) { - LM_GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_STRING); +const char * lm_gguf_get_val_str(const struct lm_gguf_context * ctx, int key_id) { + LM_GGML_ASSERT(ctx->kv[key_id].type == LM_GGUF_TYPE_STRING); return ctx->kv[key_id].value.str.data; } -int gguf_get_n_tensors(const struct gguf_context * ctx) { +int lm_gguf_get_n_tensors(const struct lm_gguf_context * ctx) { return ctx->header.n_tensors; } -int gguf_find_tensor(const struct gguf_context * ctx, const char * name) { +int lm_gguf_find_tensor(const struct lm_gguf_context * ctx, const char * name) { // return -1 if tensor not found int tensorfound = -1; - const int n_tensors = gguf_get_n_tensors(ctx); + const int n_tensors = lm_gguf_get_n_tensors(ctx); for (int i = 0; i < n_tensors; ++i) { - if (strcmp(name, gguf_get_tensor_name(ctx, i)) == 0) { + if (strcmp(name, lm_gguf_get_tensor_name(ctx, i)) == 0) { tensorfound = i; break; } @@ -21454,24 +21454,24 @@ int gguf_find_tensor(const struct gguf_context * ctx, const char * name) { return tensorfound; } -size_t gguf_get_tensor_offset(const struct gguf_context * ctx, int i) { +size_t lm_gguf_get_tensor_offset(const struct lm_gguf_context * ctx, int i) { return ctx->infos[i].offset; } -char * gguf_get_tensor_name(const struct gguf_context * ctx, int i) { +char * lm_gguf_get_tensor_name(const struct lm_gguf_context * ctx, int i) { return ctx->infos[i].name.data; } // returns the index -static int gguf_get_or_add_key(struct gguf_context * ctx, const char * key) { - const int idx = gguf_find_key(ctx, key); +static int lm_gguf_get_or_add_key(struct lm_gguf_context * ctx, const char * key) { + const int idx = lm_gguf_find_key(ctx, key); if (idx >= 0) { return idx; } - const int n_kv = gguf_get_n_kv(ctx); + const int n_kv = lm_gguf_get_n_kv(ctx); - ctx->kv = realloc(ctx->kv, (n_kv + 1) * sizeof(struct gguf_kv)); + ctx->kv = realloc(ctx->kv, (n_kv + 1) * sizeof(struct lm_gguf_kv)); ctx->kv[n_kv].key.n = strlen(key); ctx->kv[n_kv].key.data = strdup(key); ctx->header.n_kv++; @@ -21479,156 +21479,156 @@ static int gguf_get_or_add_key(struct gguf_context * ctx, const char * key) { return n_kv; } -void gguf_set_val_u8(struct gguf_context * ctx, const char * key, uint8_t val) { - const int idx = gguf_get_or_add_key(ctx, key); +void lm_gguf_set_val_u8(struct lm_gguf_context * ctx, const char * key, uint8_t val) { + const int idx = lm_gguf_get_or_add_key(ctx, key); - ctx->kv[idx].type = GGUF_TYPE_UINT8; + ctx->kv[idx].type = LM_GGUF_TYPE_UINT8; ctx->kv[idx].value.uint8 = val; } -void gguf_set_val_i8(struct gguf_context * ctx, const char * key, int8_t val) { - const int idx = gguf_get_or_add_key(ctx, key); +void lm_gguf_set_val_i8(struct lm_gguf_context * ctx, const char * key, int8_t val) { + const int idx = lm_gguf_get_or_add_key(ctx, key); - ctx->kv[idx].type = GGUF_TYPE_INT8; + ctx->kv[idx].type = LM_GGUF_TYPE_INT8; ctx->kv[idx].value.int8 = val; } -void gguf_set_val_u16(struct gguf_context * ctx, const char * key, uint16_t val) { - const int idx = gguf_get_or_add_key(ctx, key); +void lm_gguf_set_val_u16(struct lm_gguf_context * ctx, const char * key, uint16_t val) { + const int idx = lm_gguf_get_or_add_key(ctx, key); - ctx->kv[idx].type = GGUF_TYPE_UINT16; + ctx->kv[idx].type = LM_GGUF_TYPE_UINT16; ctx->kv[idx].value.uint16 = val; } -void gguf_set_val_i16(struct gguf_context * ctx, const char * key, int16_t val) { - const int idx = gguf_get_or_add_key(ctx, key); +void lm_gguf_set_val_i16(struct lm_gguf_context * ctx, const char * key, int16_t val) { + const int idx = lm_gguf_get_or_add_key(ctx, key); - ctx->kv[idx].type = GGUF_TYPE_INT16; + ctx->kv[idx].type = LM_GGUF_TYPE_INT16; ctx->kv[idx].value.int16 = val; } -void gguf_set_val_u32(struct gguf_context * ctx, const char * key, uint32_t val) { - const int idx = gguf_get_or_add_key(ctx, key); +void lm_gguf_set_val_u32(struct lm_gguf_context * ctx, const char * key, uint32_t val) { + const int idx = lm_gguf_get_or_add_key(ctx, key); - ctx->kv[idx].type = GGUF_TYPE_UINT32; + ctx->kv[idx].type = LM_GGUF_TYPE_UINT32; ctx->kv[idx].value.uint32 = val; } -void gguf_set_val_i32(struct gguf_context * ctx, const char * key, int32_t val) { - const int idx = gguf_get_or_add_key(ctx, key); +void lm_gguf_set_val_i32(struct lm_gguf_context * ctx, const char * key, int32_t val) { + const int idx = lm_gguf_get_or_add_key(ctx, key); - ctx->kv[idx].type = GGUF_TYPE_INT32; + ctx->kv[idx].type = LM_GGUF_TYPE_INT32; ctx->kv[idx].value.int32 = val; } -void gguf_set_val_f32(struct gguf_context * ctx, const char * key, float val) { - const int idx = gguf_get_or_add_key(ctx, key); +void lm_gguf_set_val_f32(struct lm_gguf_context * ctx, const char * key, float val) { + const int idx = lm_gguf_get_or_add_key(ctx, key); - ctx->kv[idx].type = GGUF_TYPE_FLOAT32; + ctx->kv[idx].type = LM_GGUF_TYPE_FLOAT32; ctx->kv[idx].value.float32 = val; } -void gguf_set_val_u64(struct gguf_context * ctx, const char * key, uint64_t val) { - const int idx = gguf_get_or_add_key(ctx, key); +void lm_gguf_set_val_u64(struct lm_gguf_context * ctx, const char * key, uint64_t val) { + const int idx = lm_gguf_get_or_add_key(ctx, key); - ctx->kv[idx].type = GGUF_TYPE_UINT64; + ctx->kv[idx].type = LM_GGUF_TYPE_UINT64; ctx->kv[idx].value.uint64 = val; } -void gguf_set_val_i64(struct gguf_context * ctx, const char * key, int64_t val) { - const int idx = gguf_get_or_add_key(ctx, key); +void lm_gguf_set_val_i64(struct lm_gguf_context * ctx, const char * key, int64_t val) { + const int idx = lm_gguf_get_or_add_key(ctx, key); - ctx->kv[idx].type = GGUF_TYPE_INT64; + ctx->kv[idx].type = LM_GGUF_TYPE_INT64; ctx->kv[idx].value.int64 = val; } -void gguf_set_val_f64(struct gguf_context * ctx, const char * key, double val) { - const int idx = gguf_get_or_add_key(ctx, key); +void lm_gguf_set_val_f64(struct lm_gguf_context * ctx, const char * key, double val) { + const int idx = lm_gguf_get_or_add_key(ctx, key); - ctx->kv[idx].type = GGUF_TYPE_FLOAT64; + ctx->kv[idx].type = LM_GGUF_TYPE_FLOAT64; ctx->kv[idx].value.float64 = val; } -void gguf_set_val_bool(struct gguf_context * ctx, const char * key, bool val) { - const int idx = gguf_get_or_add_key(ctx, key); +void lm_gguf_set_val_bool(struct lm_gguf_context * ctx, const char * key, bool val) { + const int idx = lm_gguf_get_or_add_key(ctx, key); - ctx->kv[idx].type = GGUF_TYPE_BOOL; + ctx->kv[idx].type = LM_GGUF_TYPE_BOOL; ctx->kv[idx].value.bool_ = val; } -void gguf_set_val_str(struct gguf_context * ctx, const char * key, const char * val) { - const int idx = gguf_get_or_add_key(ctx, key); +void lm_gguf_set_val_str(struct lm_gguf_context * ctx, const char * key, const char * val) { + const int idx = lm_gguf_get_or_add_key(ctx, key); - ctx->kv[idx].type = GGUF_TYPE_STRING; + ctx->kv[idx].type = LM_GGUF_TYPE_STRING; ctx->kv[idx].value.str.n = strlen(val); ctx->kv[idx].value.str.data = strdup(val); } -void gguf_set_arr_data(struct gguf_context * ctx, const char * key, enum gguf_type type, const void * data, int n) { - const int idx = gguf_get_or_add_key(ctx, key); +void lm_gguf_set_arr_data(struct lm_gguf_context * ctx, const char * key, enum lm_gguf_type type, const void * data, int n) { + const int idx = lm_gguf_get_or_add_key(ctx, key); - ctx->kv[idx].type = GGUF_TYPE_ARRAY; + ctx->kv[idx].type = LM_GGUF_TYPE_ARRAY; ctx->kv[idx].value.arr.type = type; ctx->kv[idx].value.arr.n = n; - ctx->kv[idx].value.arr.data = malloc(n*GGUF_TYPE_SIZE[type]); - memcpy(ctx->kv[idx].value.arr.data, data, n*GGUF_TYPE_SIZE[type]); + ctx->kv[idx].value.arr.data = malloc(n*LM_GGUF_TYPE_SIZE[type]); + memcpy(ctx->kv[idx].value.arr.data, data, n*LM_GGUF_TYPE_SIZE[type]); } -void gguf_set_arr_str(struct gguf_context * ctx, const char * key, const char ** data, int n) { - const int idx = gguf_get_or_add_key(ctx, key); +void lm_gguf_set_arr_str(struct lm_gguf_context * ctx, const char * key, const char ** data, int n) { + const int idx = lm_gguf_get_or_add_key(ctx, key); - ctx->kv[idx].type = GGUF_TYPE_ARRAY; - ctx->kv[idx].value.arr.type = GGUF_TYPE_STRING; + ctx->kv[idx].type = LM_GGUF_TYPE_ARRAY; + ctx->kv[idx].value.arr.type = LM_GGUF_TYPE_STRING; ctx->kv[idx].value.arr.n = n; - ctx->kv[idx].value.arr.data = malloc(n*sizeof(struct gguf_str)); + ctx->kv[idx].value.arr.data = malloc(n*sizeof(struct lm_gguf_str)); for (int i = 0; i < n; i++) { - struct gguf_str * str = &((struct gguf_str *)ctx->kv[idx].value.arr.data)[i]; + struct lm_gguf_str * str = &((struct lm_gguf_str *)ctx->kv[idx].value.arr.data)[i]; str->n = strlen(data[i]); str->data = strdup(data[i]); } } // set or add KV pairs from another context -void gguf_set_kv(struct gguf_context * ctx, struct gguf_context * src) { +void lm_gguf_set_kv(struct lm_gguf_context * ctx, struct lm_gguf_context * src) { for (uint32_t i = 0; i < src->header.n_kv; i++) { switch (src->kv[i].type) { - case GGUF_TYPE_UINT8: gguf_set_val_u8 (ctx, src->kv[i].key.data, src->kv[i].value.uint8); break; - case GGUF_TYPE_INT8: gguf_set_val_i8 (ctx, src->kv[i].key.data, src->kv[i].value.int8); break; - case GGUF_TYPE_UINT16: gguf_set_val_u16 (ctx, src->kv[i].key.data, src->kv[i].value.uint16); break; - case GGUF_TYPE_INT16: gguf_set_val_i16 (ctx, src->kv[i].key.data, src->kv[i].value.int16); break; - case GGUF_TYPE_UINT32: gguf_set_val_u32 (ctx, src->kv[i].key.data, src->kv[i].value.uint32); break; - case GGUF_TYPE_INT32: gguf_set_val_i32 (ctx, src->kv[i].key.data, src->kv[i].value.int32); break; - case GGUF_TYPE_FLOAT32: gguf_set_val_f32 (ctx, src->kv[i].key.data, src->kv[i].value.float32); break; - case GGUF_TYPE_UINT64: gguf_set_val_u64 (ctx, src->kv[i].key.data, src->kv[i].value.uint64); break; - case GGUF_TYPE_INT64: gguf_set_val_i64 (ctx, src->kv[i].key.data, src->kv[i].value.int64); break; - case GGUF_TYPE_FLOAT64: gguf_set_val_f64 (ctx, src->kv[i].key.data, src->kv[i].value.float64); break; - case GGUF_TYPE_BOOL: gguf_set_val_bool(ctx, src->kv[i].key.data, src->kv[i].value.bool_); break; - case GGUF_TYPE_STRING: gguf_set_val_str (ctx, src->kv[i].key.data, src->kv[i].value.str.data); break; - case GGUF_TYPE_ARRAY: + case LM_GGUF_TYPE_UINT8: lm_gguf_set_val_u8 (ctx, src->kv[i].key.data, src->kv[i].value.uint8); break; + case LM_GGUF_TYPE_INT8: lm_gguf_set_val_i8 (ctx, src->kv[i].key.data, src->kv[i].value.int8); break; + case LM_GGUF_TYPE_UINT16: lm_gguf_set_val_u16 (ctx, src->kv[i].key.data, src->kv[i].value.uint16); break; + case LM_GGUF_TYPE_INT16: lm_gguf_set_val_i16 (ctx, src->kv[i].key.data, src->kv[i].value.int16); break; + case LM_GGUF_TYPE_UINT32: lm_gguf_set_val_u32 (ctx, src->kv[i].key.data, src->kv[i].value.uint32); break; + case LM_GGUF_TYPE_INT32: lm_gguf_set_val_i32 (ctx, src->kv[i].key.data, src->kv[i].value.int32); break; + case LM_GGUF_TYPE_FLOAT32: lm_gguf_set_val_f32 (ctx, src->kv[i].key.data, src->kv[i].value.float32); break; + case LM_GGUF_TYPE_UINT64: lm_gguf_set_val_u64 (ctx, src->kv[i].key.data, src->kv[i].value.uint64); break; + case LM_GGUF_TYPE_INT64: lm_gguf_set_val_i64 (ctx, src->kv[i].key.data, src->kv[i].value.int64); break; + case LM_GGUF_TYPE_FLOAT64: lm_gguf_set_val_f64 (ctx, src->kv[i].key.data, src->kv[i].value.float64); break; + case LM_GGUF_TYPE_BOOL: lm_gguf_set_val_bool(ctx, src->kv[i].key.data, src->kv[i].value.bool_); break; + case LM_GGUF_TYPE_STRING: lm_gguf_set_val_str (ctx, src->kv[i].key.data, src->kv[i].value.str.data); break; + case LM_GGUF_TYPE_ARRAY: { - if (src->kv[i].value.arr.type == GGUF_TYPE_STRING) { + if (src->kv[i].value.arr.type == LM_GGUF_TYPE_STRING) { const char ** data = malloc(src->kv[i].value.arr.n*sizeof(char *)); for (uint32_t j = 0; j < src->kv[i].value.arr.n; j++) { - data[j] = ((struct gguf_str *)src->kv[i].value.arr.data)[j].data; + data[j] = ((struct lm_gguf_str *)src->kv[i].value.arr.data)[j].data; } - gguf_set_arr_str(ctx, src->kv[i].key.data, data, src->kv[i].value.arr.n); + lm_gguf_set_arr_str(ctx, src->kv[i].key.data, data, src->kv[i].value.arr.n); free(data); - } else if (src->kv[i].value.arr.type == GGUF_TYPE_ARRAY) { + } else if (src->kv[i].value.arr.type == LM_GGUF_TYPE_ARRAY) { LM_GGML_ASSERT(false && "nested arrays not supported"); } else { - gguf_set_arr_data(ctx, src->kv[i].key.data, src->kv[i].value.arr.type, src->kv[i].value.arr.data, src->kv[i].value.arr.n); + lm_gguf_set_arr_data(ctx, src->kv[i].key.data, src->kv[i].value.arr.type, src->kv[i].value.arr.data, src->kv[i].value.arr.n); } } break; - case GGUF_TYPE_COUNT: LM_GGML_ASSERT(false && "invalid type"); break; + case LM_GGUF_TYPE_COUNT: LM_GGML_ASSERT(false && "invalid type"); break; } } } -void gguf_add_tensor( - struct gguf_context * ctx, +void lm_gguf_add_tensor( + struct lm_gguf_context * ctx, const struct lm_ggml_tensor * tensor) { const int idx = ctx->header.n_tensors; - ctx->infos = realloc(ctx->infos, (idx + 1)*sizeof(struct gguf_tensor_info)); + ctx->infos = realloc(ctx->infos, (idx + 1)*sizeof(struct lm_gguf_tensor_info)); ctx->infos[idx].name.n = strlen(tensor->name); ctx->infos[idx].name.data = strdup(tensor->name); @@ -21654,8 +21654,8 @@ void gguf_add_tensor( ctx->header.n_tensors++; } -void gguf_set_tensor_type(struct gguf_context * ctx, const char * name, enum lm_ggml_type type) { - const int idx = gguf_find_tensor(ctx, name); +void lm_gguf_set_tensor_type(struct lm_gguf_context * ctx, const char * name, enum lm_ggml_type type) { + const int idx = lm_gguf_find_tensor(ctx, name); if (idx < 0) { LM_GGML_ASSERT(false && "tensor not found"); } @@ -21663,8 +21663,8 @@ void gguf_set_tensor_type(struct gguf_context * ctx, const char * name, enum lm_ ctx->infos[idx].type = type; } -void gguf_set_tensor_data(struct gguf_context * ctx, const char * name, const void * data, size_t size) { - const int idx = gguf_find_tensor(ctx, name); +void lm_gguf_set_tensor_data(struct lm_gguf_context * ctx, const char * name, const void * data, size_t size) { + const int idx = lm_gguf_find_tensor(ctx, name); if (idx < 0) { LM_GGML_ASSERT(false && "tensor not found"); } @@ -21678,23 +21678,23 @@ void gguf_set_tensor_data(struct gguf_context * ctx, const char * name, const vo } } -//static void gguf_fwrite_str(FILE * file, const struct gguf_str * val) { +//static void lm_gguf_fwrite_str(FILE * file, const struct lm_gguf_str * val) { // fwrite(&val->n, sizeof(val->n), 1, file); // fwrite(val->data, sizeof(char), val->n, file); //} // -//static void gguf_fwrite_el(FILE * file, const void * val, size_t size) { +//static void lm_gguf_fwrite_el(FILE * file, const void * val, size_t size) { // fwrite(val, sizeof(char), size, file); //} -struct gguf_buf { +struct lm_gguf_buf { void * data; size_t size; size_t offset; }; -static struct gguf_buf gguf_buf_init(size_t size) { - struct gguf_buf buf = { +static struct lm_gguf_buf lm_gguf_buf_init(size_t size) { + struct lm_gguf_buf buf = { /*buf.data =*/ size == 0 ? NULL : malloc(size), /*buf.size =*/ size, /*buf.offset =*/ 0, @@ -21703,13 +21703,13 @@ static struct gguf_buf gguf_buf_init(size_t size) { return buf; } -static void gguf_buf_free(struct gguf_buf buf) { +static void lm_gguf_buf_free(struct lm_gguf_buf buf) { if (buf.data) { free(buf.data); } } -static void gguf_buf_grow(struct gguf_buf * buf, size_t size) { +static void lm_gguf_buf_grow(struct lm_gguf_buf * buf, size_t size) { if (buf->offset + size > buf->size) { buf->size = 1.5*(buf->offset + size); if (buf->data) { @@ -21718,8 +21718,8 @@ static void gguf_buf_grow(struct gguf_buf * buf, size_t size) { } } -static void gguf_bwrite_str(struct gguf_buf * buf, const struct gguf_str * val) { - gguf_buf_grow(buf, sizeof(val->n) + val->n); +static void lm_gguf_bwrite_str(struct lm_gguf_buf * buf, const struct lm_gguf_str * val) { + lm_gguf_buf_grow(buf, sizeof(val->n) + val->n); if (buf->data) { memcpy((char *) buf->data + buf->offset, &val->n, sizeof(val->n)); @@ -21732,8 +21732,8 @@ static void gguf_bwrite_str(struct gguf_buf * buf, const struct gguf_str * val) buf->offset += val->n; } -static void gguf_bwrite_el(struct gguf_buf * buf, const void * val, size_t el_size) { - gguf_buf_grow(buf, el_size); +static void lm_gguf_bwrite_el(struct lm_gguf_buf * buf, const void * val, size_t el_size) { + lm_gguf_buf_grow(buf, el_size); if (buf->data) { memcpy((char *) buf->data + buf->offset, val, el_size); @@ -21741,78 +21741,78 @@ static void gguf_bwrite_el(struct gguf_buf * buf, const void * val, size_t el_si buf->offset += el_size; } -static void gguf_write_to_buf(const struct gguf_context * ctx, struct gguf_buf * buf, bool only_meta) { +static void lm_gguf_write_to_buf(const struct lm_gguf_context * ctx, struct lm_gguf_buf * buf, bool only_meta) { // write header - gguf_bwrite_el(buf, &ctx->header.magic, sizeof(ctx->header.magic)); - gguf_bwrite_el(buf, &ctx->header.version, sizeof(ctx->header.version)); - gguf_bwrite_el(buf, &ctx->header.n_tensors, sizeof(ctx->header.n_tensors)); - gguf_bwrite_el(buf, &ctx->header.n_kv, sizeof(ctx->header.n_kv)); + lm_gguf_bwrite_el(buf, &ctx->header.magic, sizeof(ctx->header.magic)); + lm_gguf_bwrite_el(buf, &ctx->header.version, sizeof(ctx->header.version)); + lm_gguf_bwrite_el(buf, &ctx->header.n_tensors, sizeof(ctx->header.n_tensors)); + lm_gguf_bwrite_el(buf, &ctx->header.n_kv, sizeof(ctx->header.n_kv)); // write key-value pairs for (uint32_t i = 0; i < ctx->header.n_kv; ++i) { - struct gguf_kv * kv = &ctx->kv[i]; + struct lm_gguf_kv * kv = &ctx->kv[i]; - gguf_bwrite_str(buf, &kv->key); - gguf_bwrite_el (buf, &kv->type, sizeof(kv->type)); + lm_gguf_bwrite_str(buf, &kv->key); + lm_gguf_bwrite_el (buf, &kv->type, sizeof(kv->type)); switch (kv->type) { - case GGUF_TYPE_UINT8: gguf_bwrite_el( buf, &kv->value.uint8, sizeof(kv->value.uint8) ); break; - case GGUF_TYPE_INT8: gguf_bwrite_el (buf, &kv->value.int8, sizeof(kv->value.int8) ); break; - case GGUF_TYPE_UINT16: gguf_bwrite_el (buf, &kv->value.uint16, sizeof(kv->value.uint16) ); break; - case GGUF_TYPE_INT16: gguf_bwrite_el (buf, &kv->value.int16, sizeof(kv->value.int16) ); break; - case GGUF_TYPE_UINT32: gguf_bwrite_el (buf, &kv->value.uint32, sizeof(kv->value.uint32) ); break; - case GGUF_TYPE_INT32: gguf_bwrite_el (buf, &kv->value.int32, sizeof(kv->value.int32) ); break; - case GGUF_TYPE_FLOAT32: gguf_bwrite_el (buf, &kv->value.float32, sizeof(kv->value.float32)); break; - case GGUF_TYPE_UINT64: gguf_bwrite_el (buf, &kv->value.uint64, sizeof(kv->value.uint64) ); break; - case GGUF_TYPE_INT64: gguf_bwrite_el (buf, &kv->value.int64, sizeof(kv->value.int64) ); break; - case GGUF_TYPE_FLOAT64: gguf_bwrite_el (buf, &kv->value.float64, sizeof(kv->value.float64)); break; - case GGUF_TYPE_BOOL: gguf_bwrite_el (buf, &kv->value.bool_, sizeof(kv->value.bool_) ); break; - case GGUF_TYPE_STRING: gguf_bwrite_str(buf, &kv->value.str ); break; - case GGUF_TYPE_ARRAY: + case LM_GGUF_TYPE_UINT8: lm_gguf_bwrite_el( buf, &kv->value.uint8, sizeof(kv->value.uint8) ); break; + case LM_GGUF_TYPE_INT8: lm_gguf_bwrite_el (buf, &kv->value.int8, sizeof(kv->value.int8) ); break; + case LM_GGUF_TYPE_UINT16: lm_gguf_bwrite_el (buf, &kv->value.uint16, sizeof(kv->value.uint16) ); break; + case LM_GGUF_TYPE_INT16: lm_gguf_bwrite_el (buf, &kv->value.int16, sizeof(kv->value.int16) ); break; + case LM_GGUF_TYPE_UINT32: lm_gguf_bwrite_el (buf, &kv->value.uint32, sizeof(kv->value.uint32) ); break; + case LM_GGUF_TYPE_INT32: lm_gguf_bwrite_el (buf, &kv->value.int32, sizeof(kv->value.int32) ); break; + case LM_GGUF_TYPE_FLOAT32: lm_gguf_bwrite_el (buf, &kv->value.float32, sizeof(kv->value.float32)); break; + case LM_GGUF_TYPE_UINT64: lm_gguf_bwrite_el (buf, &kv->value.uint64, sizeof(kv->value.uint64) ); break; + case LM_GGUF_TYPE_INT64: lm_gguf_bwrite_el (buf, &kv->value.int64, sizeof(kv->value.int64) ); break; + case LM_GGUF_TYPE_FLOAT64: lm_gguf_bwrite_el (buf, &kv->value.float64, sizeof(kv->value.float64)); break; + case LM_GGUF_TYPE_BOOL: lm_gguf_bwrite_el (buf, &kv->value.bool_, sizeof(kv->value.bool_) ); break; + case LM_GGUF_TYPE_STRING: lm_gguf_bwrite_str(buf, &kv->value.str ); break; + case LM_GGUF_TYPE_ARRAY: { - gguf_bwrite_el(buf, &kv->value.arr.type, sizeof(kv->value.arr.type)); - gguf_bwrite_el(buf, &kv->value.arr.n, sizeof(kv->value.arr.n) ); + lm_gguf_bwrite_el(buf, &kv->value.arr.type, sizeof(kv->value.arr.type)); + lm_gguf_bwrite_el(buf, &kv->value.arr.n, sizeof(kv->value.arr.n) ); switch (kv->value.arr.type) { - case GGUF_TYPE_UINT8: - case GGUF_TYPE_INT8: - case GGUF_TYPE_UINT16: - case GGUF_TYPE_INT16: - case GGUF_TYPE_UINT32: - case GGUF_TYPE_INT32: - case GGUF_TYPE_FLOAT32: - case GGUF_TYPE_UINT64: - case GGUF_TYPE_INT64: - case GGUF_TYPE_FLOAT64: - case GGUF_TYPE_BOOL: + case LM_GGUF_TYPE_UINT8: + case LM_GGUF_TYPE_INT8: + case LM_GGUF_TYPE_UINT16: + case LM_GGUF_TYPE_INT16: + case LM_GGUF_TYPE_UINT32: + case LM_GGUF_TYPE_INT32: + case LM_GGUF_TYPE_FLOAT32: + case LM_GGUF_TYPE_UINT64: + case LM_GGUF_TYPE_INT64: + case LM_GGUF_TYPE_FLOAT64: + case LM_GGUF_TYPE_BOOL: { - gguf_bwrite_el(buf, kv->value.arr.data, kv->value.arr.n * GGUF_TYPE_SIZE[kv->value.arr.type]); + lm_gguf_bwrite_el(buf, kv->value.arr.data, kv->value.arr.n * LM_GGUF_TYPE_SIZE[kv->value.arr.type]); } break; - case GGUF_TYPE_STRING: + case LM_GGUF_TYPE_STRING: { for (uint32_t j = 0; j < kv->value.arr.n; ++j) { - gguf_bwrite_str(buf, &((struct gguf_str *) kv->value.arr.data)[j]); + lm_gguf_bwrite_str(buf, &((struct lm_gguf_str *) kv->value.arr.data)[j]); } } break; - case GGUF_TYPE_ARRAY: - case GGUF_TYPE_COUNT: LM_GGML_ASSERT(false && "invalid type"); break; + case LM_GGUF_TYPE_ARRAY: + case LM_GGUF_TYPE_COUNT: LM_GGML_ASSERT(false && "invalid type"); break; } } break; - case GGUF_TYPE_COUNT: LM_GGML_ASSERT(false && "invalid type"); + case LM_GGUF_TYPE_COUNT: LM_GGML_ASSERT(false && "invalid type"); } } // write tensor infos for (uint32_t i = 0; i < ctx->header.n_tensors; ++i) { - struct gguf_tensor_info * info = &ctx->infos[i]; + struct lm_gguf_tensor_info * info = &ctx->infos[i]; - gguf_bwrite_str(buf, &info->name); - gguf_bwrite_el (buf, &info->n_dims, sizeof(info->n_dims)); + lm_gguf_bwrite_str(buf, &info->name); + lm_gguf_bwrite_el (buf, &info->n_dims, sizeof(info->n_dims)); for (uint32_t j = 0; j < info->n_dims; ++j) { - gguf_bwrite_el(buf, &info->ne[j], sizeof(info->ne[j])); + lm_gguf_bwrite_el(buf, &info->ne[j], sizeof(info->ne[j])); } - gguf_bwrite_el(buf, &info->type, sizeof(info->type)); - gguf_bwrite_el(buf, &info->offset, sizeof(info->offset)); + lm_gguf_bwrite_el(buf, &info->type, sizeof(info->type)); + lm_gguf_bwrite_el(buf, &info->offset, sizeof(info->offset)); } // we require the data section to be aligned, so take into account any padding @@ -21823,7 +21823,7 @@ static void gguf_write_to_buf(const struct gguf_context * ctx, struct gguf_buf * if (offset_pad != offset) { uint8_t pad = 0; for (size_t i = 0; i < offset_pad - offset; ++i) { - gguf_bwrite_el(buf, &pad, sizeof(pad)); + lm_gguf_bwrite_el(buf, &pad, sizeof(pad)); } } } @@ -21836,17 +21836,17 @@ static void gguf_write_to_buf(const struct gguf_context * ctx, struct gguf_buf * // write tensor data for (uint32_t i = 0; i < ctx->header.n_tensors; ++i) { - struct gguf_tensor_info * info = &ctx->infos[i]; + struct lm_gguf_tensor_info * info = &ctx->infos[i]; const size_t size = info->size; const size_t size_pad = LM_GGML_PAD(size, ctx->alignment); - gguf_bwrite_el(buf, info->data, size); + lm_gguf_bwrite_el(buf, info->data, size); if (size_pad != size) { uint8_t pad = 0; for (size_t j = 0; j < size_pad - size; ++j) { - gguf_bwrite_el(buf, &pad, sizeof(pad)); + lm_gguf_bwrite_el(buf, &pad, sizeof(pad)); } } @@ -21856,40 +21856,40 @@ static void gguf_write_to_buf(const struct gguf_context * ctx, struct gguf_buf * } } -void gguf_write_to_file(const struct gguf_context * ctx, const char * fname, bool only_meta) { +void lm_gguf_write_to_file(const struct lm_gguf_context * ctx, const char * fname, bool only_meta) { FILE * file = fopen(fname, "wb"); if (!file) { LM_GGML_ASSERT(false && "failed to open file for writing"); } - struct gguf_buf buf = gguf_buf_init(16*1024); + struct lm_gguf_buf buf = lm_gguf_buf_init(16*1024); - gguf_write_to_buf(ctx, &buf, only_meta); + lm_gguf_write_to_buf(ctx, &buf, only_meta); fwrite(buf.data, 1, buf.offset, file); - gguf_buf_free(buf); + lm_gguf_buf_free(buf); fclose(file); } -size_t gguf_get_meta_size(const struct gguf_context * ctx) { +size_t lm_gguf_get_meta_size(const struct lm_gguf_context * ctx) { // no allocs - only compute size - struct gguf_buf buf = gguf_buf_init(0); + struct lm_gguf_buf buf = lm_gguf_buf_init(0); - gguf_write_to_buf(ctx, &buf, true); + lm_gguf_write_to_buf(ctx, &buf, true); return buf.offset; } -void gguf_get_meta_data(const struct gguf_context * ctx, void * data) { - struct gguf_buf buf = gguf_buf_init(16*1024); +void lm_gguf_get_meta_data(const struct lm_gguf_context * ctx, void * data) { + struct lm_gguf_buf buf = lm_gguf_buf_init(16*1024); - gguf_write_to_buf(ctx, &buf, true); + lm_gguf_write_to_buf(ctx, &buf, true); memcpy(data, buf.data, buf.offset); - gguf_buf_free(buf); + lm_gguf_buf_free(buf); } //////////////////////////////////////////////////////////////////////////////// diff --git a/cpp/ggml.h b/cpp/ggml.h index b937b70..b252464 100644 --- a/cpp/ggml.h +++ b/cpp/ggml.h @@ -231,11 +231,11 @@ #define LM_GGML_EXIT_SUCCESS 0 #define LM_GGML_EXIT_ABORTED 1 -#define GGUF_MAGIC "GGUF" +#define LM_GGUF_MAGIC "GGUF" -#define GGUF_VERSION 3 +#define LM_GGUF_VERSION 3 -#define GGUF_DEFAULT_ALIGNMENT 32 +#define LM_GGUF_DEFAULT_ALIGNMENT 32 #define LM_GGML_UNUSED(x) (void)(x) @@ -1941,122 +1941,122 @@ extern "C" { // gguf // - enum gguf_type { - GGUF_TYPE_UINT8 = 0, - GGUF_TYPE_INT8 = 1, - GGUF_TYPE_UINT16 = 2, - GGUF_TYPE_INT16 = 3, - GGUF_TYPE_UINT32 = 4, - GGUF_TYPE_INT32 = 5, - GGUF_TYPE_FLOAT32 = 6, - GGUF_TYPE_BOOL = 7, - GGUF_TYPE_STRING = 8, - GGUF_TYPE_ARRAY = 9, - GGUF_TYPE_UINT64 = 10, - GGUF_TYPE_INT64 = 11, - GGUF_TYPE_FLOAT64 = 12, - GGUF_TYPE_COUNT, // marks the end of the enum + enum lm_gguf_type { + LM_GGUF_TYPE_UINT8 = 0, + LM_GGUF_TYPE_INT8 = 1, + LM_GGUF_TYPE_UINT16 = 2, + LM_GGUF_TYPE_INT16 = 3, + LM_GGUF_TYPE_UINT32 = 4, + LM_GGUF_TYPE_INT32 = 5, + LM_GGUF_TYPE_FLOAT32 = 6, + LM_GGUF_TYPE_BOOL = 7, + LM_GGUF_TYPE_STRING = 8, + LM_GGUF_TYPE_ARRAY = 9, + LM_GGUF_TYPE_UINT64 = 10, + LM_GGUF_TYPE_INT64 = 11, + LM_GGUF_TYPE_FLOAT64 = 12, + LM_GGUF_TYPE_COUNT, // marks the end of the enum }; - struct gguf_context; + struct lm_gguf_context; - struct gguf_init_params { + struct lm_gguf_init_params { bool no_alloc; // if not NULL, create a lm_ggml_context and allocate the tensor data in it struct lm_ggml_context ** ctx; }; - LM_GGML_API struct gguf_context * gguf_init_empty(void); - LM_GGML_API struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_params params); - //LM_GGML_API struct gguf_context * gguf_init_from_buffer(..); + LM_GGML_API struct lm_gguf_context * lm_gguf_init_empty(void); + LM_GGML_API struct lm_gguf_context * lm_gguf_init_from_file(const char * fname, struct lm_gguf_init_params params); + //LM_GGML_API struct lm_gguf_context * lm_gguf_init_from_buffer(..); - LM_GGML_API void gguf_free(struct gguf_context * ctx); + LM_GGML_API void lm_gguf_free(struct lm_gguf_context * ctx); - LM_GGML_API const char * gguf_type_name(enum gguf_type type); + LM_GGML_API const char * lm_gguf_type_name(enum lm_gguf_type type); - LM_GGML_API int gguf_get_version (const struct gguf_context * ctx); - LM_GGML_API size_t gguf_get_alignment (const struct gguf_context * ctx); - LM_GGML_API size_t gguf_get_data_offset(const struct gguf_context * ctx); - LM_GGML_API void * gguf_get_data (const struct gguf_context * ctx); + LM_GGML_API int lm_gguf_get_version (const struct lm_gguf_context * ctx); + LM_GGML_API size_t lm_gguf_get_alignment (const struct lm_gguf_context * ctx); + LM_GGML_API size_t lm_gguf_get_data_offset(const struct lm_gguf_context * ctx); + LM_GGML_API void * lm_gguf_get_data (const struct lm_gguf_context * ctx); - LM_GGML_API int gguf_get_n_kv(const struct gguf_context * ctx); - LM_GGML_API int gguf_find_key(const struct gguf_context * ctx, const char * key); - LM_GGML_API const char * gguf_get_key (const struct gguf_context * ctx, int key_id); + LM_GGML_API int lm_gguf_get_n_kv(const struct lm_gguf_context * ctx); + LM_GGML_API int lm_gguf_find_key(const struct lm_gguf_context * ctx, const char * key); + LM_GGML_API const char * lm_gguf_get_key (const struct lm_gguf_context * ctx, int key_id); - LM_GGML_API enum gguf_type gguf_get_kv_type (const struct gguf_context * ctx, int key_id); - LM_GGML_API enum gguf_type gguf_get_arr_type(const struct gguf_context * ctx, int key_id); + LM_GGML_API enum lm_gguf_type lm_gguf_get_kv_type (const struct lm_gguf_context * ctx, int key_id); + LM_GGML_API enum lm_gguf_type lm_gguf_get_arr_type(const struct lm_gguf_context * ctx, int key_id); // will abort if the wrong type is used for the key - LM_GGML_API uint8_t gguf_get_val_u8 (const struct gguf_context * ctx, int key_id); - LM_GGML_API int8_t gguf_get_val_i8 (const struct gguf_context * ctx, int key_id); - LM_GGML_API uint16_t gguf_get_val_u16 (const struct gguf_context * ctx, int key_id); - LM_GGML_API int16_t gguf_get_val_i16 (const struct gguf_context * ctx, int key_id); - LM_GGML_API uint32_t gguf_get_val_u32 (const struct gguf_context * ctx, int key_id); - LM_GGML_API int32_t gguf_get_val_i32 (const struct gguf_context * ctx, int key_id); - LM_GGML_API float gguf_get_val_f32 (const struct gguf_context * ctx, int key_id); - LM_GGML_API uint64_t gguf_get_val_u64 (const struct gguf_context * ctx, int key_id); - LM_GGML_API int64_t gguf_get_val_i64 (const struct gguf_context * ctx, int key_id); - LM_GGML_API double gguf_get_val_f64 (const struct gguf_context * ctx, int key_id); - LM_GGML_API bool gguf_get_val_bool(const struct gguf_context * ctx, int key_id); - LM_GGML_API const char * gguf_get_val_str (const struct gguf_context * ctx, int key_id); - LM_GGML_API int gguf_get_arr_n (const struct gguf_context * ctx, int key_id); - LM_GGML_API const void * gguf_get_arr_data(const struct gguf_context * ctx, int key_id); - LM_GGML_API const char * gguf_get_arr_str (const struct gguf_context * ctx, int key_id, int i); - - LM_GGML_API int gguf_get_n_tensors (const struct gguf_context * ctx); - LM_GGML_API int gguf_find_tensor (const struct gguf_context * ctx, const char * name); - LM_GGML_API size_t gguf_get_tensor_offset(const struct gguf_context * ctx, int i); - LM_GGML_API char * gguf_get_tensor_name (const struct gguf_context * ctx, int i); + LM_GGML_API uint8_t lm_gguf_get_val_u8 (const struct lm_gguf_context * ctx, int key_id); + LM_GGML_API int8_t lm_gguf_get_val_i8 (const struct lm_gguf_context * ctx, int key_id); + LM_GGML_API uint16_t lm_gguf_get_val_u16 (const struct lm_gguf_context * ctx, int key_id); + LM_GGML_API int16_t lm_gguf_get_val_i16 (const struct lm_gguf_context * ctx, int key_id); + LM_GGML_API uint32_t lm_gguf_get_val_u32 (const struct lm_gguf_context * ctx, int key_id); + LM_GGML_API int32_t lm_gguf_get_val_i32 (const struct lm_gguf_context * ctx, int key_id); + LM_GGML_API float lm_gguf_get_val_f32 (const struct lm_gguf_context * ctx, int key_id); + LM_GGML_API uint64_t lm_gguf_get_val_u64 (const struct lm_gguf_context * ctx, int key_id); + LM_GGML_API int64_t lm_gguf_get_val_i64 (const struct lm_gguf_context * ctx, int key_id); + LM_GGML_API double lm_gguf_get_val_f64 (const struct lm_gguf_context * ctx, int key_id); + LM_GGML_API bool lm_gguf_get_val_bool(const struct lm_gguf_context * ctx, int key_id); + LM_GGML_API const char * lm_gguf_get_val_str (const struct lm_gguf_context * ctx, int key_id); + LM_GGML_API int lm_gguf_get_arr_n (const struct lm_gguf_context * ctx, int key_id); + LM_GGML_API const void * lm_gguf_get_arr_data(const struct lm_gguf_context * ctx, int key_id); + LM_GGML_API const char * lm_gguf_get_arr_str (const struct lm_gguf_context * ctx, int key_id, int i); + + LM_GGML_API int lm_gguf_get_n_tensors (const struct lm_gguf_context * ctx); + LM_GGML_API int lm_gguf_find_tensor (const struct lm_gguf_context * ctx, const char * name); + LM_GGML_API size_t lm_gguf_get_tensor_offset(const struct lm_gguf_context * ctx, int i); + LM_GGML_API char * lm_gguf_get_tensor_name (const struct lm_gguf_context * ctx, int i); // overrides existing values or adds a new one - LM_GGML_API void gguf_set_val_u8 (struct gguf_context * ctx, const char * key, uint8_t val); - LM_GGML_API void gguf_set_val_i8 (struct gguf_context * ctx, const char * key, int8_t val); - LM_GGML_API void gguf_set_val_u16 (struct gguf_context * ctx, const char * key, uint16_t val); - LM_GGML_API void gguf_set_val_i16 (struct gguf_context * ctx, const char * key, int16_t val); - LM_GGML_API void gguf_set_val_u32 (struct gguf_context * ctx, const char * key, uint32_t val); - LM_GGML_API void gguf_set_val_i32 (struct gguf_context * ctx, const char * key, int32_t val); - LM_GGML_API void gguf_set_val_f32 (struct gguf_context * ctx, const char * key, float val); - LM_GGML_API void gguf_set_val_u64 (struct gguf_context * ctx, const char * key, uint64_t val); - LM_GGML_API void gguf_set_val_i64 (struct gguf_context * ctx, const char * key, int64_t val); - LM_GGML_API void gguf_set_val_f64 (struct gguf_context * ctx, const char * key, double val); - LM_GGML_API void gguf_set_val_bool(struct gguf_context * ctx, const char * key, bool val); - LM_GGML_API void gguf_set_val_str (struct gguf_context * ctx, const char * key, const char * val); - LM_GGML_API void gguf_set_arr_data(struct gguf_context * ctx, const char * key, enum gguf_type type, const void * data, int n); - LM_GGML_API void gguf_set_arr_str (struct gguf_context * ctx, const char * key, const char ** data, int n); + LM_GGML_API void lm_gguf_set_val_u8 (struct lm_gguf_context * ctx, const char * key, uint8_t val); + LM_GGML_API void lm_gguf_set_val_i8 (struct lm_gguf_context * ctx, const char * key, int8_t val); + LM_GGML_API void lm_gguf_set_val_u16 (struct lm_gguf_context * ctx, const char * key, uint16_t val); + LM_GGML_API void lm_gguf_set_val_i16 (struct lm_gguf_context * ctx, const char * key, int16_t val); + LM_GGML_API void lm_gguf_set_val_u32 (struct lm_gguf_context * ctx, const char * key, uint32_t val); + LM_GGML_API void lm_gguf_set_val_i32 (struct lm_gguf_context * ctx, const char * key, int32_t val); + LM_GGML_API void lm_gguf_set_val_f32 (struct lm_gguf_context * ctx, const char * key, float val); + LM_GGML_API void lm_gguf_set_val_u64 (struct lm_gguf_context * ctx, const char * key, uint64_t val); + LM_GGML_API void lm_gguf_set_val_i64 (struct lm_gguf_context * ctx, const char * key, int64_t val); + LM_GGML_API void lm_gguf_set_val_f64 (struct lm_gguf_context * ctx, const char * key, double val); + LM_GGML_API void lm_gguf_set_val_bool(struct lm_gguf_context * ctx, const char * key, bool val); + LM_GGML_API void lm_gguf_set_val_str (struct lm_gguf_context * ctx, const char * key, const char * val); + LM_GGML_API void lm_gguf_set_arr_data(struct lm_gguf_context * ctx, const char * key, enum lm_gguf_type type, const void * data, int n); + LM_GGML_API void lm_gguf_set_arr_str (struct lm_gguf_context * ctx, const char * key, const char ** data, int n); // set or add KV pairs from another context - LM_GGML_API void gguf_set_kv(struct gguf_context * ctx, struct gguf_context * src); + LM_GGML_API void lm_gguf_set_kv(struct lm_gguf_context * ctx, struct lm_gguf_context * src); // manage tensor info - LM_GGML_API void gguf_add_tensor(struct gguf_context * ctx, const struct lm_ggml_tensor * tensor); - LM_GGML_API void gguf_set_tensor_type(struct gguf_context * ctx, const char * name, enum lm_ggml_type type); - LM_GGML_API void gguf_set_tensor_data(struct gguf_context * ctx, const char * name, const void * data, size_t size); + LM_GGML_API void lm_gguf_add_tensor(struct lm_gguf_context * ctx, const struct lm_ggml_tensor * tensor); + LM_GGML_API void lm_gguf_set_tensor_type(struct lm_gguf_context * ctx, const char * name, enum lm_ggml_type type); + LM_GGML_API void lm_gguf_set_tensor_data(struct lm_gguf_context * ctx, const char * name, const void * data, size_t size); // writing gguf files can be done in 2 ways: // - // - write the entire gguf_context to a binary file in a single pass: + // - write the entire lm_gguf_context to a binary file in a single pass: // - // gguf_write_to_file(ctx, fname); + // lm_gguf_write_to_file(ctx, fname); // // - first prepare a file with a placeholder for the meta data, write the tensor data, then write the meta data: // // FILE * f = fopen(fname, "wb"); - // fseek(f, gguf_get_meta_size(ctx), SEEK_SET); + // fseek(f, lm_gguf_get_meta_size(ctx), SEEK_SET); // fwrite(f, ...); - // void * data = gguf_meta_get_meta_data(ctx); + // void * data = lm_gguf_meta_get_meta_data(ctx); // fseek(f, 0, SEEK_SET); - // fwrite(f, data, gguf_get_meta_size(ctx)); + // fwrite(f, data, lm_gguf_get_meta_size(ctx)); // free(data); // fclose(f); // // write the entire context to a binary file - LM_GGML_API void gguf_write_to_file(const struct gguf_context * ctx, const char * fname, bool only_meta); + LM_GGML_API void lm_gguf_write_to_file(const struct lm_gguf_context * ctx, const char * fname, bool only_meta); // get the size in bytes of the meta data (header, kv pairs, tensor info) including padding - LM_GGML_API size_t gguf_get_meta_size(const struct gguf_context * ctx); - LM_GGML_API void gguf_get_meta_data(const struct gguf_context * ctx, void * data); + LM_GGML_API size_t lm_gguf_get_meta_size(const struct lm_gguf_context * ctx); + LM_GGML_API void lm_gguf_get_meta_data(const struct lm_gguf_context * ctx, void * data); // // system info diff --git a/cpp/llama.cpp b/cpp/llama.cpp index e22bb7c..3e214da 100644 --- a/cpp/llama.cpp +++ b/cpp/llama.cpp @@ -549,14 +549,14 @@ struct LLM_TN { // gguf helpers // -#define GGUF_GET_KEY(ctx, dst, func, type, req, key) \ +#define LM_GGUF_GET_KEY(ctx, dst, func, type, req, key) \ do { \ const std::string skey(key); \ - const int kid = gguf_find_key(ctx, skey.c_str()); \ + const int kid = lm_gguf_find_key(ctx, skey.c_str()); \ if (kid >= 0) { \ - enum gguf_type ktype = gguf_get_kv_type(ctx, kid); \ + enum lm_gguf_type ktype = lm_gguf_get_kv_type(ctx, kid); \ if (ktype != (type)) { \ - throw std::runtime_error(format("key %s has wrong type: %s", skey.c_str(), gguf_type_name(ktype))); \ + throw std::runtime_error(format("key %s has wrong type: %s", skey.c_str(), lm_gguf_type_name(ktype))); \ } \ (dst) = func(ctx, kid); \ } else if (req) { \ @@ -1587,14 +1587,14 @@ static void llama_kv_cache_seq_shift( // enum llama_fver { - GGUF_FILE_VERSION_V1 = 1, - GGUF_FILE_VERSION_V2 = 2, + LM_GGUF_FILE_VERSION_V1 = 1, + LM_GGUF_FILE_VERSION_V2 = 2, }; static const char * llama_file_version_name(llama_fver version) { switch (version) { - case GGUF_FILE_VERSION_V1: return "GGUF V1 (support until nov 2023)"; - case GGUF_FILE_VERSION_V2: return "GGUF V2 (latest)"; + case LM_GGUF_FILE_VERSION_V1: return "GGUF V1 (support until nov 2023)"; + case LM_GGUF_FILE_VERSION_V2: return "GGUF V2 (latest)"; } return "unknown"; @@ -1634,27 +1634,27 @@ struct llama_model_loader { std::unique_ptr mapping; - struct gguf_context * ctx_gguf = NULL; + struct lm_gguf_context * ctx_gguf = NULL; struct lm_ggml_context * ctx_meta = NULL; llama_model_loader(const std::string & fname, bool use_mmap) : file(fname.c_str(), "rb") { - struct gguf_init_params params = { + struct lm_gguf_init_params params = { /*.no_alloc = */ true, /*.ctx = */ &ctx_meta, }; - ctx_gguf = gguf_init_from_file(fname.c_str(), params); + ctx_gguf = lm_gguf_init_from_file(fname.c_str(), params); if (!ctx_gguf) { throw std::runtime_error(format("%s: failed to load model from %s\n", __func__, fname.c_str())); } - n_kv = gguf_get_n_kv(ctx_gguf); - n_tensors = gguf_get_n_tensors(ctx_gguf); + n_kv = lm_gguf_get_n_kv(ctx_gguf); + n_tensors = lm_gguf_get_n_tensors(ctx_gguf); - fver = (enum llama_fver ) gguf_get_version(ctx_gguf); + fver = (enum llama_fver ) lm_gguf_get_version(ctx_gguf); for (int i = 0; i < n_tensors; i++) { - const char * name = gguf_get_tensor_name(ctx_gguf, i); + const char * name = lm_gguf_get_tensor_name(ctx_gguf, i); struct lm_ggml_tensor * t = lm_ggml_get_tensor(ctx_meta, name); n_elements += lm_ggml_nelements(t); n_bytes += lm_ggml_nbytes(t); @@ -1672,7 +1672,7 @@ struct llama_model_loader { enum lm_ggml_type type_max = LM_GGML_TYPE_F32; for (int i = 0; i < n_tensors; i++) { - const char * name = gguf_get_tensor_name(ctx_gguf, i); + const char * name = lm_gguf_get_tensor_name(ctx_gguf, i); struct lm_ggml_tensor * meta = lm_ggml_get_tensor(ctx_meta, name); n_type[meta->type]++; @@ -1709,17 +1709,17 @@ struct llama_model_loader { ftype = (llama_ftype) (ftype | LLAMA_FTYPE_GUESSED); { - const int kid = gguf_find_key(ctx_gguf, "general.file_type"); + const int kid = lm_gguf_find_key(ctx_gguf, "general.file_type"); if (kid >= 0) { - ftype = (llama_ftype) gguf_get_val_u32(ctx_gguf, kid); + ftype = (llama_ftype) lm_gguf_get_val_u32(ctx_gguf, kid); } } for (int i = 0; i < n_kv; i++) { - const char * name = gguf_get_key(ctx_gguf, i); - const enum gguf_type type = gguf_get_kv_type(ctx_gguf, i); + const char * name = lm_gguf_get_key(ctx_gguf, i); + const enum lm_gguf_type type = lm_gguf_get_kv_type(ctx_gguf, i); - LLAMA_LOG_INFO("%s: - kv %3d: %42s %-8s\n", __func__, i, name, gguf_type_name(type)); + LLAMA_LOG_INFO("%s: - kv %3d: %42s %-8s\n", __func__, i, name, lm_gguf_type_name(type)); } // print type counts @@ -1742,7 +1742,7 @@ struct llama_model_loader { ~llama_model_loader() { if (ctx_gguf) { - gguf_free(ctx_gguf); + lm_gguf_free(ctx_gguf); } if (ctx_meta) { lm_ggml_free(ctx_meta); @@ -1753,7 +1753,7 @@ struct llama_model_loader { const auto kv = LLM_KV(LLM_ARCH_UNKNOWN); std::string arch_name; - GGUF_GET_KEY(ctx_gguf, arch_name, gguf_get_val_str, GGUF_TYPE_STRING, false, kv(LLM_KV_GENERAL_ARCHITECTURE)); + LM_GGUF_GET_KEY(ctx_gguf, arch_name, lm_gguf_get_val_str, LM_GGUF_TYPE_STRING, false, kv(LLM_KV_GENERAL_ARCHITECTURE)); return arch_name; } @@ -1765,7 +1765,7 @@ struct llama_model_loader { } const char * get_tensor_name(int i) const { - return gguf_get_tensor_name(ctx_gguf, i); + return lm_gguf_get_tensor_name(ctx_gguf, i); } struct lm_ggml_tensor * get_tensor_meta(int i) const { @@ -1835,13 +1835,13 @@ struct llama_model_loader { } size_t file_offset(const char * name) const { - const int idx = gguf_find_tensor(ctx_gguf, name); + const int idx = lm_gguf_find_tensor(ctx_gguf, name); if (idx < 0) { throw std::runtime_error(format("%s: tensor '%s' not found in the file", __func__, name)); } - return gguf_get_data_offset(ctx_gguf) + gguf_get_tensor_offset(ctx_gguf, idx); + return lm_gguf_get_data_offset(ctx_gguf) + lm_gguf_get_tensor_offset(ctx_gguf, idx); } void load_data_for(struct lm_ggml_tensor * cur) const { @@ -1860,8 +1860,8 @@ struct llama_model_loader { size_t size_lock = 0; size_t size_pref = 0; // prefetch - for (int i = 0; i < gguf_get_n_tensors(ctx_gguf); i++) { - struct lm_ggml_tensor * cur = lm_ggml_get_tensor(ctx, gguf_get_tensor_name(ctx_gguf, i)); + for (int i = 0; i < lm_gguf_get_n_tensors(ctx_gguf); i++) { + struct lm_ggml_tensor * cur = lm_ggml_get_tensor(ctx, lm_gguf_get_tensor_name(ctx_gguf, i)); size_data += lm_ggml_nbytes(cur); if (cur->backend == LM_GGML_BACKEND_CPU) { size_pref += lm_ggml_nbytes(cur); @@ -1876,8 +1876,8 @@ struct llama_model_loader { } size_t done_size = 0; - for (int i = 0; i < gguf_get_n_tensors(ctx_gguf); i++) { - struct lm_ggml_tensor * cur = lm_ggml_get_tensor(ctx, gguf_get_tensor_name(ctx_gguf, i)); + for (int i = 0; i < lm_gguf_get_n_tensors(ctx_gguf); i++) { + struct lm_ggml_tensor * cur = lm_ggml_get_tensor(ctx, lm_gguf_get_tensor_name(ctx_gguf, i)); LM_GGML_ASSERT(cur); // unused tensors should have been caught by load_data already if (progress_callback) { @@ -2002,41 +2002,41 @@ static void llm_load_arch(llama_model_loader & ml, llama_model & model) { static void llm_load_hparams( llama_model_loader & ml, llama_model & model) { - struct gguf_context * ctx = ml.ctx_gguf; + struct lm_gguf_context * ctx = ml.ctx_gguf; const auto kv = LLM_KV(model.arch); auto & hparams = model.hparams; // get general kv - GGUF_GET_KEY(ctx, model.name, gguf_get_val_str, GGUF_TYPE_STRING, false, kv(LLM_KV_GENERAL_NAME)); + LM_GGUF_GET_KEY(ctx, model.name, lm_gguf_get_val_str, LM_GGUF_TYPE_STRING, false, kv(LLM_KV_GENERAL_NAME)); // get hparams kv - GGUF_GET_KEY(ctx, hparams.n_vocab, gguf_get_arr_n, GGUF_TYPE_ARRAY, true, kv(LLM_KV_TOKENIZER_LIST)); - GGUF_GET_KEY(ctx, hparams.n_ctx_train, gguf_get_val_u32, GGUF_TYPE_UINT32, true, kv(LLM_KV_CONTEXT_LENGTH)); - GGUF_GET_KEY(ctx, hparams.n_embd, gguf_get_val_u32, GGUF_TYPE_UINT32, true, kv(LLM_KV_EMBEDDING_LENGTH)); - GGUF_GET_KEY(ctx, hparams.n_ff, gguf_get_val_u32, GGUF_TYPE_UINT32, true, kv(LLM_KV_FEED_FORWARD_LENGTH)); - GGUF_GET_KEY(ctx, hparams.n_head, gguf_get_val_u32, GGUF_TYPE_UINT32, true, kv(LLM_KV_ATTENTION_HEAD_COUNT)); - GGUF_GET_KEY(ctx, hparams.n_layer, gguf_get_val_u32, GGUF_TYPE_UINT32, true, kv(LLM_KV_BLOCK_COUNT)); + LM_GGUF_GET_KEY(ctx, hparams.n_vocab, lm_gguf_get_arr_n, LM_GGUF_TYPE_ARRAY, true, kv(LLM_KV_TOKENIZER_LIST)); + LM_GGUF_GET_KEY(ctx, hparams.n_ctx_train, lm_gguf_get_val_u32, LM_GGUF_TYPE_UINT32, true, kv(LLM_KV_CONTEXT_LENGTH)); + LM_GGUF_GET_KEY(ctx, hparams.n_embd, lm_gguf_get_val_u32, LM_GGUF_TYPE_UINT32, true, kv(LLM_KV_EMBEDDING_LENGTH)); + LM_GGUF_GET_KEY(ctx, hparams.n_ff, lm_gguf_get_val_u32, LM_GGUF_TYPE_UINT32, true, kv(LLM_KV_FEED_FORWARD_LENGTH)); + LM_GGUF_GET_KEY(ctx, hparams.n_head, lm_gguf_get_val_u32, LM_GGUF_TYPE_UINT32, true, kv(LLM_KV_ATTENTION_HEAD_COUNT)); + LM_GGUF_GET_KEY(ctx, hparams.n_layer, lm_gguf_get_val_u32, LM_GGUF_TYPE_UINT32, true, kv(LLM_KV_BLOCK_COUNT)); // n_head_kv is optional, default to n_head hparams.n_head_kv = hparams.n_head; - GGUF_GET_KEY(ctx, hparams.n_head_kv, gguf_get_val_u32, GGUF_TYPE_UINT32, false, kv(LLM_KV_ATTENTION_HEAD_COUNT_KV)); + LM_GGUF_GET_KEY(ctx, hparams.n_head_kv, lm_gguf_get_val_u32, LM_GGUF_TYPE_UINT32, false, kv(LLM_KV_ATTENTION_HEAD_COUNT_KV)); // rope_freq_base (optional) hparams.rope_freq_base_train = 10000.0f; - GGUF_GET_KEY(ctx, hparams.rope_freq_base_train, gguf_get_val_f32, GGUF_TYPE_FLOAT32, false, kv(LLM_KV_ROPE_FREQ_BASE)); + LM_GGUF_GET_KEY(ctx, hparams.rope_freq_base_train, lm_gguf_get_val_f32, LM_GGUF_TYPE_FLOAT32, false, kv(LLM_KV_ROPE_FREQ_BASE)); // rope_freq_scale (inverse of the kv) is optional float ropescale = 1.0f; - GGUF_GET_KEY(ctx, ropescale, gguf_get_val_f32, GGUF_TYPE_FLOAT32, false, kv(LLM_KV_ROPE_SCALE_LINEAR)); + LM_GGUF_GET_KEY(ctx, ropescale, lm_gguf_get_val_f32, LM_GGUF_TYPE_FLOAT32, false, kv(LLM_KV_ROPE_SCALE_LINEAR)); hparams.rope_freq_scale_train = 1.0f/ropescale; // sanity check for n_rot (optional) { hparams.n_rot = hparams.n_embd / hparams.n_head; - GGUF_GET_KEY(ctx, hparams.n_rot, gguf_get_val_u32, GGUF_TYPE_UINT32, false, kv(LLM_KV_ROPE_DIMENSION_COUNT)); + LM_GGUF_GET_KEY(ctx, hparams.n_rot, lm_gguf_get_val_u32, LM_GGUF_TYPE_UINT32, false, kv(LLM_KV_ROPE_DIMENSION_COUNT)); if (model.arch == LLM_ARCH_LLAMA || model.arch == LLM_ARCH_FALCON) { if (hparams.n_rot != hparams.n_embd / hparams.n_head) { @@ -2051,7 +2051,7 @@ static void llm_load_hparams( switch (model.arch) { case LLM_ARCH_LLAMA: { - GGUF_GET_KEY(ctx, hparams.f_norm_rms_eps, gguf_get_val_f32, GGUF_TYPE_FLOAT32, true, kv(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS)); + LM_GGUF_GET_KEY(ctx, hparams.f_norm_rms_eps, lm_gguf_get_val_f32, LM_GGUF_TYPE_FLOAT32, true, kv(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS)); switch (hparams.n_layer) { case 26: model.type = e_model::MODEL_3B; break; @@ -2065,7 +2065,7 @@ static void llm_load_hparams( } break; case LLM_ARCH_FALCON: { - GGUF_GET_KEY(ctx, hparams.f_norm_eps, gguf_get_val_f32, GGUF_TYPE_FLOAT32, true, kv(LLM_KV_ATTENTION_LAYERNORM_EPS)); + LM_GGUF_GET_KEY(ctx, hparams.f_norm_eps, lm_gguf_get_val_f32, LM_GGUF_TYPE_FLOAT32, true, kv(LLM_KV_ATTENTION_LAYERNORM_EPS)); switch (hparams.n_layer) { case 32: model.type = e_model::MODEL_7B; break; @@ -2075,7 +2075,7 @@ static void llm_load_hparams( } break; case LLM_ARCH_BAICHUAN: { - GGUF_GET_KEY(ctx, hparams.f_norm_rms_eps, gguf_get_val_f32, GGUF_TYPE_FLOAT32, true, kv(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS)); + LM_GGUF_GET_KEY(ctx, hparams.f_norm_rms_eps, lm_gguf_get_val_f32, LM_GGUF_TYPE_FLOAT32, true, kv(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS)); switch (hparams.n_layer) { case 32: model.type = e_model::MODEL_7B; break; case 40: model.type = e_model::MODEL_13B; break; @@ -2084,7 +2084,7 @@ static void llm_load_hparams( } break; case LLM_ARCH_STARCODER: { - GGUF_GET_KEY(ctx, hparams.f_norm_eps, gguf_get_val_f32, GGUF_TYPE_FLOAT32, true, kv(LLM_KV_ATTENTION_LAYERNORM_EPS)); + LM_GGUF_GET_KEY(ctx, hparams.f_norm_eps, lm_gguf_get_val_f32, LM_GGUF_TYPE_FLOAT32, true, kv(LLM_KV_ATTENTION_LAYERNORM_EPS)); switch (hparams.n_layer) { case 24: model.type = e_model::MODEL_1B; break; case 36: model.type = e_model::MODEL_3B; break; @@ -2095,7 +2095,7 @@ static void llm_load_hparams( } break; case LLM_ARCH_PERSIMMON: { - GGUF_GET_KEY(ctx, hparams.f_norm_eps, gguf_get_val_f32, GGUF_TYPE_FLOAT32, true, kv(LLM_KV_ATTENTION_LAYERNORM_EPS)); + LM_GGUF_GET_KEY(ctx, hparams.f_norm_eps, lm_gguf_get_val_f32, LM_GGUF_TYPE_FLOAT32, true, kv(LLM_KV_ATTENTION_LAYERNORM_EPS)); switch (hparams.n_layer) { case 36: model.type = e_model::MODEL_8B; break; default: model.type = e_model::MODEL_UNKNOWN; @@ -2103,7 +2103,7 @@ static void llm_load_hparams( } break; case LLM_ARCH_REFACT: { - GGUF_GET_KEY(ctx, hparams.f_norm_rms_eps, gguf_get_val_f32, GGUF_TYPE_FLOAT32, true, kv(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS)); + LM_GGUF_GET_KEY(ctx, hparams.f_norm_rms_eps, lm_gguf_get_val_f32, LM_GGUF_TYPE_FLOAT32, true, kv(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS)); switch (hparams.n_layer) { case 32: model.type = e_model::MODEL_1B; break; default: model.type = e_model::MODEL_UNKNOWN; @@ -2111,7 +2111,7 @@ static void llm_load_hparams( } break; case LLM_ARCH_BLOOM: { - GGUF_GET_KEY(ctx, hparams.f_norm_eps, gguf_get_val_f32, GGUF_TYPE_FLOAT32, true, kv(LLM_KV_ATTENTION_LAYERNORM_EPS)); + LM_GGUF_GET_KEY(ctx, hparams.f_norm_eps, lm_gguf_get_val_f32, LM_GGUF_TYPE_FLOAT32, true, kv(LLM_KV_ATTENTION_LAYERNORM_EPS)); switch (hparams.n_layer) { case 24: model.type = e_model::MODEL_1B; break; @@ -2126,9 +2126,9 @@ static void llm_load_hparams( { hparams.f_clamp_kqv = 0.0f; - GGUF_GET_KEY(ctx, hparams.f_norm_eps, gguf_get_val_f32, GGUF_TYPE_FLOAT32, true, kv(LLM_KV_ATTENTION_LAYERNORM_EPS)); - GGUF_GET_KEY(ctx, hparams.f_clamp_kqv, gguf_get_val_f32, GGUF_TYPE_FLOAT32, false, kv(LLM_KV_ATTENTION_CLAMP_KQV)); - GGUF_GET_KEY(ctx, hparams.f_max_alibi_bias, gguf_get_val_f32, GGUF_TYPE_FLOAT32, true, kv(LLM_KV_ATTENTION_MAX_ALIBI_BIAS)); + LM_GGUF_GET_KEY(ctx, hparams.f_norm_eps, lm_gguf_get_val_f32, LM_GGUF_TYPE_FLOAT32, true, kv(LLM_KV_ATTENTION_LAYERNORM_EPS)); + LM_GGUF_GET_KEY(ctx, hparams.f_clamp_kqv, lm_gguf_get_val_f32, LM_GGUF_TYPE_FLOAT32, false, kv(LLM_KV_ATTENTION_CLAMP_KQV)); + LM_GGUF_GET_KEY(ctx, hparams.f_max_alibi_bias, lm_gguf_get_val_f32, LM_GGUF_TYPE_FLOAT32, true, kv(LLM_KV_ATTENTION_MAX_ALIBI_BIAS)); switch (hparams.n_layer) { case 32: model.type = e_model::MODEL_7B; break; @@ -2151,32 +2151,32 @@ static void llm_load_vocab( llama_model & model) { auto & vocab = model.vocab; - struct gguf_context * ctx = ml.ctx_gguf; + struct lm_gguf_context * ctx = ml.ctx_gguf; const auto kv = LLM_KV(model.arch); - const int token_idx = gguf_find_key(ctx, kv(LLM_KV_TOKENIZER_LIST).c_str()); + const int token_idx = lm_gguf_find_key(ctx, kv(LLM_KV_TOKENIZER_LIST).c_str()); if (token_idx == -1) { throw std::runtime_error("cannot find tokenizer vocab in model file\n"); } const float * scores = nullptr; - const int score_idx = gguf_find_key(ctx, kv(LLM_KV_TOKENIZER_SCORES).c_str()); + const int score_idx = lm_gguf_find_key(ctx, kv(LLM_KV_TOKENIZER_SCORES).c_str()); if (score_idx != -1) { - scores = (const float * ) gguf_get_arr_data(ctx, score_idx); + scores = (const float * ) lm_gguf_get_arr_data(ctx, score_idx); } const int * toktypes = nullptr; - const int toktype_idx = gguf_find_key(ctx, kv(LLM_KV_TOKENIZER_TOKEN_TYPE).c_str()); + const int toktype_idx = lm_gguf_find_key(ctx, kv(LLM_KV_TOKENIZER_TOKEN_TYPE).c_str()); if (toktype_idx != -1) { - toktypes = (const int * ) gguf_get_arr_data(ctx, toktype_idx); + toktypes = (const int * ) lm_gguf_get_arr_data(ctx, toktype_idx); } // determine vocab type { std::string tokenizer_name; - GGUF_GET_KEY(ctx, tokenizer_name, gguf_get_val_str, GGUF_TYPE_STRING, true, kv(LLM_KV_TOKENIZER_MODEL)); + LM_GGUF_GET_KEY(ctx, tokenizer_name, lm_gguf_get_val_str, LM_GGUF_TYPE_STRING, true, kv(LLM_KV_TOKENIZER_MODEL)); if (tokenizer_name == "llama") { vocab.type = LLAMA_VOCAB_TYPE_SPM; @@ -2191,15 +2191,15 @@ static void llm_load_vocab( vocab.type = LLAMA_VOCAB_TYPE_BPE; // read bpe merges and populate bpe ranks - const int merges_keyidx = gguf_find_key(ctx, kv(LLM_KV_TOKENIZER_MERGES).c_str()); + const int merges_keyidx = lm_gguf_find_key(ctx, kv(LLM_KV_TOKENIZER_MERGES).c_str()); if (merges_keyidx == -1) { throw std::runtime_error("cannot find tokenizer merges in model file\n"); } - const int n_merges = gguf_get_arr_n(ctx, merges_keyidx); + const int n_merges = lm_gguf_get_arr_n(ctx, merges_keyidx); for (int i = 0; i < n_merges; i++) { - const std::string word = gguf_get_arr_str(ctx, merges_keyidx, i); + const std::string word = lm_gguf_get_arr_str(ctx, merges_keyidx, i); LM_GGML_ASSERT(codepoints_from_utf8(word).size() > 0); std::string first; @@ -2229,12 +2229,12 @@ static void llm_load_vocab( } } - const uint32_t n_vocab = gguf_get_arr_n(ctx, token_idx); + const uint32_t n_vocab = lm_gguf_get_arr_n(ctx, token_idx); vocab.id_to_token.resize(n_vocab); for (uint32_t i = 0; i < n_vocab; i++) { - std::string word = gguf_get_arr_str(ctx, token_idx, i); + std::string word = lm_gguf_get_arr_str(ctx, token_idx, i); LM_GGML_ASSERT(codepoints_from_utf8(word).size() > 0); vocab.token_to_id[word] = i; @@ -2268,7 +2268,7 @@ static void llm_load_vocab( const std::string & key = kv(std::get<0>(it)); int32_t & id = std::get<1>(it), old_id = id; - GGUF_GET_KEY(ctx, id, gguf_get_val_u32, GGUF_TYPE_UINT32, false, key); + LM_GGUF_GET_KEY(ctx, id, lm_gguf_get_val_u32, LM_GGUF_TYPE_UINT32, false, key); // Must be >= -1 and < vocab size. Since the key is unsigned, -1 // can only come from the default value, so there's no point in // validating that. @@ -2678,7 +2678,7 @@ static void llm_load_tensors( layer.attn_norm = ml.create_tensor(ctx, tn(LLM_TENSOR_ATTN_NORM, "weight", i), {n_embd}, backend); layer.attn_norm_b = ml.create_tensor(ctx, tn(LLM_TENSOR_ATTN_NORM, "bias", i), {n_embd}, backend); - if (gguf_find_tensor(ml.ctx_gguf, tn(LLM_TENSOR_ATTN_NORM_2, "weight", i).c_str()) >= 0) { + if (lm_gguf_find_tensor(ml.ctx_gguf, tn(LLM_TENSOR_ATTN_NORM_2, "weight", i).c_str()) >= 0) { layer.attn_norm_2 = ml.create_tensor(ctx, tn(LLM_TENSOR_ATTN_NORM_2, "weight", i), {n_embd}, backend); layer.attn_norm_2_b = ml.create_tensor(ctx, tn(LLM_TENSOR_ATTN_NORM_2, "bias", i), {n_embd}, backend); @@ -8219,13 +8219,13 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s ftype = model.ftype; } - const size_t align = GGUF_DEFAULT_ALIGNMENT; - struct gguf_context * ctx_out = gguf_init_empty(); + const size_t align = LM_GGUF_DEFAULT_ALIGNMENT; + struct lm_gguf_context * ctx_out = lm_gguf_init_empty(); // copy the KV pairs from the input file - gguf_set_kv (ctx_out, ml.ctx_gguf); - gguf_set_val_u32(ctx_out, "general.quantization_version", LM_GGML_QNT_VERSION); - gguf_set_val_u32(ctx_out, "general.file_type", ftype); + lm_gguf_set_kv (ctx_out, ml.ctx_gguf); + lm_gguf_set_val_u32(ctx_out, "general.quantization_version", LM_GGML_QNT_VERSION); + lm_gguf_set_val_u32(ctx_out, "general.file_type", ftype); #ifdef LM_GGML_USE_K_QUANTS int n_attention_wv = 0; @@ -8270,13 +8270,13 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s // populate the original tensors so we get an initial meta data for (int i = 0; i < ml.n_tensors; ++i) { struct lm_ggml_tensor * meta = ml.get_tensor_meta(i); - gguf_add_tensor(ctx_out, meta); + lm_gguf_add_tensor(ctx_out, meta); } std::ofstream fout(fname_out, std::ios::binary); fout.exceptions(std::ofstream::failbit); // fail fast on write errors - const size_t meta_size = gguf_get_meta_size(ctx_out); + const size_t meta_size = lm_gguf_get_meta_size(ctx_out); LLAMA_LOG_INFO("%s: meta size = %zu bytes\n", __func__, meta_size); @@ -8407,8 +8407,8 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s total_size_new += new_size; // update the gguf meta data as we go - gguf_set_tensor_type(ctx_out, name.c_str(), new_type); - gguf_set_tensor_data(ctx_out, name.c_str(), new_data, new_size); + lm_gguf_set_tensor_type(ctx_out, name.c_str(), new_type); + lm_gguf_set_tensor_data(ctx_out, name.c_str(), new_data, new_size); // write tensor data + padding fout.write((const char *) new_data, new_size); @@ -8418,14 +8418,14 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s // go back to beginning of file and write the updated meta data { fout.seekp(0); - std::vector data(gguf_get_meta_size(ctx_out)); - gguf_get_meta_data(ctx_out, data.data()); + std::vector data(lm_gguf_get_meta_size(ctx_out)); + lm_gguf_get_meta_data(ctx_out, data.data()); fout.write((const char *) data.data(), data.size()); } fout.close(); - gguf_free(ctx_out); + lm_gguf_free(ctx_out); LLAMA_LOG_INFO("%s: model size = %8.2f MB\n", __func__, total_size_org/1024.0/1024.0); LLAMA_LOG_INFO("%s: quant size = %8.2f MB\n", __func__, total_size_new/1024.0/1024.0); @@ -8625,10 +8625,10 @@ static int llama_apply_lora_from_file_internal( lm_ggml_tensor * base_t; if (ml) { - struct gguf_context * ctx_gguf = ml->ctx_gguf; + struct lm_gguf_context * ctx_gguf = ml->ctx_gguf; // load from base model - if (gguf_find_tensor(ctx_gguf, base_name.c_str()) < 0) { + if (lm_gguf_find_tensor(ctx_gguf, base_name.c_str()) < 0) { // TODO: throw LLAMA_LOG_ERROR("%s: error: tensor '%s' not found in base model\n", __func__, base_name.c_str()); return 1; diff --git a/scripts/bootstrap.sh b/scripts/bootstrap.sh index 6a310a0..130694a 100755 --- a/scripts/bootstrap.sh +++ b/scripts/bootstrap.sh @@ -54,10 +54,14 @@ for file in "${files[@]}"; do if [ "$OS" = "Darwin" ]; then sed -i '' 's/GGML_/LM_GGML_/g' $file sed -i '' 's/ggml_/lm_ggml_/g' $file + sed -i '' 's/GGUF_/LM_GGUF_/g' $file + sed -i '' 's/gguf_/lm_gguf_/g' $file sed -i '' 's/GGMLMetalClass/LMGGMLMetalClass/g' $file else sed -i 's/GGML_/LM_GGML_/g' $file sed -i 's/ggml_/lm_ggml_/g' $file + sed -i 's/GGUF_/LM_GGUF_/g' $file + sed -i 's/gguf_/lm_gguf_/g' $file sed -i 's/GGMLMetalClass/LMGGMLMetalClass/g' $file fi done