From 165e5c08ac7cb9b5ba5d04fd374bf08019a6e01a Mon Sep 17 00:00:00 2001 From: Hans Date: Mon, 7 Oct 2024 17:45:55 +0800 Subject: [PATCH] feat: bump `llama.cpp` to `b3889` --- CMakeLists.txt | 9 --------- patches/llama.patch | 22 ---------------------- src/LlamaCompletionWorker.cpp | 12 ++++++------ src/LlamaContext.cpp | 16 +++++++--------- src/common.hpp | 3 ++- src/llama.cpp | 2 +- 6 files changed, 16 insertions(+), 48 deletions(-) delete mode 100644 patches/llama.patch diff --git a/CMakeLists.txt b/CMakeLists.txt index e3b7aa6..347dd90 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -62,15 +62,6 @@ if (VULKAN_SDK) find_package(Vulkan REQUIRED) endif() -find_program(PATCH patch REQUIRED) - -add_custom_target( - patch ALL - COMMAND ${PATCH} -p1 -N < ${CMAKE_SOURCE_DIR}/patches/llama.patch || true - WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}/src/llama.cpp - COMMENT "Applying patches" -) - set(BUILD_SHARED_LIBS OFF CACHE BOOL "Build shared libraries") add_subdirectory("src/llama.cpp") diff --git a/patches/llama.patch b/patches/llama.patch deleted file mode 100644 index f141092..0000000 --- a/patches/llama.patch +++ /dev/null @@ -1,22 +0,0 @@ -diff --git a/ggml/src/ggml-vulkan.cpp b/ggml/src/ggml-vulkan.cpp -index fa68360b..f9ff7b5d 100644 ---- a/ggml/src/ggml-vulkan.cpp -+++ b/ggml/src/ggml-vulkan.cpp -@@ -617,9 +617,15 @@ static void ggml_vk_create_pipeline(vk_device& device, vk_pipeline& pipeline, co - vk::PipelineCreateFlags(), - pipeline_shader_create_info, - pipeline->layout); -- pipeline->pipeline = device->device.createComputePipeline(VK_NULL_HANDLE, compute_pipeline_create_info).value; - -- device->pipelines.push_back(pipeline); -+ try { -+ pipeline->pipeline = device->device.createComputePipeline(VK_NULL_HANDLE, compute_pipeline_create_info).value; -+ device->pipelines.push_back(pipeline); -+ } catch(vk::UnknownError const&) { -+ VK_LOG_DEBUG("Failed to create pipeline " << name); -+ ggml_vk_destroy_pipeline(device->device, pipeline); -+ pipeline.reset(); -+ } - } - - static void ggml_vk_destroy_pipeline(vk::Device& device, vk_pipeline& pipeline) { diff --git a/src/LlamaCompletionWorker.cpp b/src/LlamaCompletionWorker.cpp index 9455d33..d23f2fa 100644 --- a/src/LlamaCompletionWorker.cpp +++ b/src/LlamaCompletionWorker.cpp @@ -59,13 +59,13 @@ void LlamaCompletionWorker::Execute() { size_t n_cur = 0; size_t n_input = 0; const auto model = _sess->model(); - const bool add_bos = llama_should_add_bos_token(model); + const bool add_bos = llama_add_bos_token(model); auto ctx = _sess->context(); - llama_set_rng_seed(ctx, _params.seed); + auto sparams = llama_sampler_chain_default_params(); - LlamaCppSampling sampling{llama_sampling_init(_params.sparams), - llama_sampling_free}; + LlamaCppSampling sampling{gpt_sampler_init(model, _params.sparams), + gpt_sampler_free}; std::vector prompt_tokens = ::llama_tokenize(ctx, _params.prompt, add_bos); @@ -109,8 +109,8 @@ void LlamaCompletionWorker::Execute() { } // sample the next token const llama_token new_token_id = - llama_sampling_sample(sampling.get(), ctx, nullptr); - llama_sampling_accept(sampling.get(), ctx, new_token_id, true); + gpt_sampler_sample(sampling.get(), ctx, -1); + gpt_sampler_accept(sampling.get(), new_token_id, true); // prepare the next batch embd->emplace_back(new_token_id); auto token = llama_token_to_piece(ctx, new_token_id); diff --git a/src/LlamaContext.cpp b/src/LlamaContext.cpp index ec3fd33..c4a5b9f 100644 --- a/src/LlamaContext.cpp +++ b/src/LlamaContext.cpp @@ -75,7 +75,7 @@ LlamaContext::LlamaContext(const Napi::CallbackInfo &info) params.embedding = get_option(options, "embedding", false); params.n_ctx = get_option(options, "n_ctx", 512); params.n_batch = get_option(options, "n_batch", 2048); - params.n_threads = + params.cpuparams.n_threads = get_option(options, "n_threads", cpu_get_num_math() / 2); params.n_gpu_layers = get_option(options, "n_gpu_layers", -1); params.use_mlock = get_option(options, "use_mlock", false); @@ -86,16 +86,14 @@ LlamaContext::LlamaContext(const Napi::CallbackInfo &info) llama_backend_init(); llama_numa_init(params.numa); - llama_model *model; - llama_context *ctx; - std::tie(model, ctx) = llama_init_from_gpt_params(params); + auto result = llama_init_from_gpt_params(params); - if (model == nullptr || ctx == nullptr) { + if (result.model == nullptr || result.context == nullptr) { Napi::TypeError::New(env, "Failed to load model") .ThrowAsJavaScriptException(); } - _sess = std::make_shared(model, ctx, params); + _sess = std::make_shared(result.model, result.context, params); _info = gpt_params_get_system_info(params); } @@ -167,11 +165,11 @@ Napi::Value LlamaContext::Completion(const Napi::CallbackInfo &info) { params.sparams.penalty_present = get_option(options, "penalty_present", 0.00f); params.sparams.penalize_nl = get_option(options, "penalize_nl", false); - params.sparams.typical_p = get_option(options, "typical_p", 1.00f); - params.ignore_eos = get_option(options, "ignore_eos", false); + params.sparams.typ_p = get_option(options, "typical_p", 1.00f); + params.sparams.ignore_eos = get_option(options, "ignore_eos", false); params.sparams.grammar = get_option(options, "grammar", ""); params.n_keep = get_option(options, "n_keep", 0); - params.seed = get_option(options, "seed", LLAMA_DEFAULT_SEED); + params.sparams.seed = get_option(options, "seed", LLAMA_DEFAULT_SEED); std::vector stop_words; if (options.Has("stop") && options.Get("stop").IsArray()) { auto stop_words_array = options.Get("stop").As(); diff --git a/src/common.hpp b/src/common.hpp index 06e2f5b..68a878f 100644 --- a/src/common.hpp +++ b/src/common.hpp @@ -1,6 +1,7 @@ #pragma once #include "common/common.h" +#include "common/sampling.h" #include "llama.h" #include #include @@ -12,7 +13,7 @@ typedef std::unique_ptr LlamaCppModel; typedef std::unique_ptr LlamaCppContext; -typedef std::unique_ptr +typedef std::unique_ptr LlamaCppSampling; typedef std::unique_ptr LlamaCppBatch; diff --git a/src/llama.cpp b/src/llama.cpp index 4730fac..b6d6c52 160000 --- a/src/llama.cpp +++ b/src/llama.cpp @@ -1 +1 @@ -Subproject commit 4730faca618ff9cee0780580145e3cbe86f24876 +Subproject commit b6d6c5289f1c9c677657c380591201ddb210b649