diff --git a/.github/workflows/release-build.yml b/.github/workflows/build-release.yml similarity index 95% rename from .github/workflows/release-build.yml rename to .github/workflows/build-release.yml index 8820c99..c3ea1f2 100644 --- a/.github/workflows/release-build.yml +++ b/.github/workflows/build-release.yml @@ -1,6 +1,5 @@ -name: Release build +name: Build release artifacts on: - push: workflow_dispatch: inputs: upload-artifacts: @@ -43,8 +42,6 @@ jobs: - name: Install dependencies run: yarn install - name: Prepare & build - env: - CMAKE_BUILD_PARALLEL_LEVEL: 4 run: | bash ./scripts/prepare-linux.sh bash ./scripts/build-linux.sh @@ -52,7 +49,7 @@ jobs: if: inputs.upload-artifacts == 'true' uses: actions/upload-artifact@v4 with: - name: bin-linux-arm64 + name: bin-linux-x86_64 path: bin retention-days: ${{ inputs.artifacts-retention-days }} @@ -83,8 +80,6 @@ jobs: with: platforms: linux/arm64 - name: Prepare & build - env: - CMAKE_BUILD_PARALLEL_LEVEL: 4 run: | docker run --rm \ -e CMAKE_BUILD_PARALLEL_LEVEL=${{ env.CMAKE_BUILD_PARALLEL_LEVEL }} \ @@ -127,14 +122,12 @@ jobs: - name: Install dependencies run: yarn install - name: Build (macOS) - env: - CMAKE_BUILD_PARALLEL_LEVEL: 4 run: bash ./scripts/build-macos.sh - name: Upload build artifacts if: inputs.upload-artifacts == 'true' uses: actions/upload-artifact@v4 with: - name: bin-macos + name: bin-${{ matrix.os }} path: bin retention-days: ${{ inputs.artifacts-retention-days }} diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index eb7ecf8..d5baf89 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -7,7 +7,7 @@ on: jobs: build: - uses: ./.github/workflows/release-build.yml + uses: ./.github/workflows/build-release.yml with: upload-artifacts: true artifacts-retention-days: 3 diff --git a/CMakeLists.txt b/CMakeLists.txt index c213ed1..2110b6e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -7,7 +7,7 @@ project (llama-node) set(CMAKE_CXX_STANDARD 17) execute_process(COMMAND - git apply ${CMAKE_CURRENT_SOURCE_DIR}/scripts/ggml-cpu-CMakeLists.txt.patch + git apply ${CMAKE_CURRENT_SOURCE_DIR}/scripts/llama.cpp.patch WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} ) diff --git a/lib/binding.ts b/lib/binding.ts index bfadf84..55f010c 100644 --- a/lib/binding.ts +++ b/lib/binding.ts @@ -8,12 +8,15 @@ export type ChatMessage = { export type LlamaModelOptions = { model: string embedding?: boolean + embd_normalize?: number + pooling_type?: number n_ctx?: number n_batch?: number n_threads?: number n_gpu_layers?: number use_mlock?: boolean use_mmap?: boolean + vocab_only?: boolean } export type LlamaCompletionOptions = { @@ -23,7 +26,21 @@ export type LlamaCompletionOptions = { temperature?: number top_k?: number top_p?: number - repetition_penalty?: number + min_p?: number + mirostat?: number + mirostat_tau?: number + mirostat_eta?: number + penalty_last_n?: number + penalty_repeat?: number + penalty_freq?: number + penalty_present?: number + typ_p?: number + xtc_threshold?: number + xtc_probability?: number + dry_multiplier?: number + dry_base?: number + dry_allowed_length?: number + dry_penalty_last_n?: number n_predict?: number max_length?: number max_tokens?: number @@ -37,6 +54,16 @@ export type LlamaCompletionResult = { tokens_predicted: number tokens_evaluated: number truncated: boolean + timings: { + prompt_n: number + prompt_ms: number + prompt_per_token_ms: number + prompt_per_second: number + predicted_n: number + predicted_ms: number + predicted_per_token_ms: number + predicted_per_second: number + } } export type LlamaCompletionToken = { @@ -54,6 +81,7 @@ export type EmbeddingResult = { export interface LlamaContext { new (options: LlamaModelOptions): LlamaContext getSystemInfo(): string + getModelInfo(): object getFormattedChat(messages: ChatMessage[]): string completion(options: LlamaCompletionOptions, callback?: (token: LlamaCompletionToken) => void): Promise stopCompletion(): void diff --git a/package.json b/package.json index 7eaa0d7..f9a4086 100644 --- a/package.json +++ b/package.json @@ -1,7 +1,7 @@ { "name": "@fugood/llama.node", "access": "public", - "version": "0.3.3", + "version": "0.3.4", "description": "Llama.cpp for Node.js", "main": "lib/index.js", "scripts": { diff --git a/scripts/llama.cpp.patch b/scripts/llama.cpp.patch new file mode 100644 index 0000000..0667a20 --- /dev/null +++ b/scripts/llama.cpp.patch @@ -0,0 +1,37 @@ +diff --git a/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt b/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +index 683b90af..e1bf104c 100644 +--- a/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt ++++ b/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +@@ -80,7 +80,7 @@ function(ggml_add_cpu_backend_variant_impl tag_name) + message(STATUS "ARM detected") + + if (MSVC AND NOT CMAKE_C_COMPILER_ID STREQUAL "Clang") +- message(FATAL_ERROR "MSVC is not supported for ARM, use clang") ++ list(APPEND ARCH_FLAGS /arch:armv8.7) + else() + check_cxx_compiler_flag(-mfp16-format=ieee COMPILER_SUPPORTS_FP16_FORMAT_I3E) + if (NOT "${COMPILER_SUPPORTS_FP16_FORMAT_I3E}" STREQUAL "") +diff --git a/src/llama.cpp/common/common.h b/src/llama.cpp/common/common.h +index 1d2bd932..b5007c66 100644 +--- a/src/llama.cpp/common/common.h ++++ b/src/llama.cpp/common/common.h +@@ -183,6 +183,7 @@ struct common_params_vocoder { + }; + + struct common_params { ++ bool vocab_only = false; + int32_t n_predict = -1; // new tokens to predict + int32_t n_ctx = 4096; // context size + int32_t n_batch = 2048; // logical batch size for prompt processing (must be >=32 to use BLAS) +diff --git a/src/llama.cpp/common/common.cpp b/src/llama.cpp/common/common.cpp +index 20be9291..1bedc55d 100644 +--- a/src/llama.cpp/common/common.cpp ++++ b/src/llama.cpp/common/common.cpp +@@ -1017,6 +1017,7 @@ struct llama_model_params common_model_params_to_llama(common_params & params) { + if (params.n_gpu_layers != -1) { + mparams.n_gpu_layers = params.n_gpu_layers; + } ++ mparams.vocab_only = params.vocab_only; + mparams.rpc_servers = params.rpc_servers.c_str(); + mparams.main_gpu = params.main_gpu; + mparams.split_mode = params.split_mode; diff --git a/src/EmbeddingWorker.cpp b/src/EmbeddingWorker.cpp index a76368a..0ad8d35 100644 --- a/src/EmbeddingWorker.cpp +++ b/src/EmbeddingWorker.cpp @@ -2,8 +2,8 @@ #include "LlamaContext.h" EmbeddingWorker::EmbeddingWorker(const Napi::CallbackInfo &info, - LlamaSessionPtr &sess, std::string text) - : AsyncWorker(info.Env()), Deferred(info.Env()), _sess(sess), _text(text) {} + LlamaSessionPtr &sess, std::string text, common_params ¶ms) + : AsyncWorker(info.Env()), Deferred(info.Env()), _sess(sess), _text(text), _params(params) {} void EmbeddingWorker::Execute() { llama_kv_cache_clear(_sess->context()); @@ -14,20 +14,30 @@ void EmbeddingWorker::Execute() { } const int n_embd = llama_n_embd(_sess->model()); do { + auto ctx = _sess->context(); int ret = - llama_decode(_sess->context(), + llama_decode(ctx, llama_batch_get_one(tokens.data(), tokens.size())); if (ret < 0) { SetError("Failed to inference, code: " + std::to_string(ret)); break; } - const float *embd = llama_get_embeddings_seq(_sess->context(), 0); + + float *embd; + const enum llama_pooling_type pooling_type = llama_pooling_type(ctx); + if (pooling_type == LLAMA_POOLING_TYPE_NONE) { + embd = llama_get_embeddings(ctx); + } else { + embd = llama_get_embeddings_seq(ctx, 0); + } if (embd == nullptr) { SetError("Failed to get embeddings"); break; } _result.embedding.resize(n_embd); - memcpy(_result.embedding.data(), embd, n_embd * sizeof(float)); + std::vector embedding(embd, embd + n_embd), out(embd, embd + n_embd); + common_embd_normalize(embedding.data(), out.data(), n_embd, _params.embd_normalize); + memcpy(_result.embedding.data(), out.data(), n_embd * sizeof(float)); } while (false); } diff --git a/src/EmbeddingWorker.h b/src/EmbeddingWorker.h index a21ffbf..3bc989d 100644 --- a/src/EmbeddingWorker.h +++ b/src/EmbeddingWorker.h @@ -9,7 +9,7 @@ class EmbeddingWorker : public Napi::AsyncWorker, public Napi::Promise::Deferred { public: EmbeddingWorker(const Napi::CallbackInfo &info, LlamaSessionPtr &sess, - std::string text); + std::string text, common_params ¶ms); protected: void Execute(); @@ -19,5 +19,6 @@ class EmbeddingWorker : public Napi::AsyncWorker, private: LlamaSessionPtr _sess; std::string _text; + common_params _params; EmbeddingResult _result; }; diff --git a/src/LlamaCompletionWorker.cpp b/src/LlamaCompletionWorker.cpp index 91abda6..2ff96d3 100644 --- a/src/LlamaCompletionWorker.cpp +++ b/src/LlamaCompletionWorker.cpp @@ -159,6 +159,22 @@ void LlamaCompletionWorker::OnOK() { Napi::Boolean::New(Napi::AsyncWorker::Env(), _result.truncated)); result.Set("text", Napi::String::New(Napi::AsyncWorker::Env(), _result.text.c_str())); + + auto ctx = _sess->context(); + const auto timings_token = llama_perf_context(ctx); + + auto timingsResult = Napi::Object::New(Napi::AsyncWorker::Env()); + timingsResult.Set("prompt_n", Napi::Number::New(Napi::AsyncWorker::Env(), timings_token.n_p_eval)); + timingsResult.Set("prompt_ms", Napi::Number::New(Napi::AsyncWorker::Env(), timings_token.t_p_eval_ms)); + timingsResult.Set("prompt_per_token_ms", Napi::Number::New(Napi::AsyncWorker::Env(), timings_token.t_p_eval_ms / timings_token.n_p_eval)); + timingsResult.Set("prompt_per_second", Napi::Number::New(Napi::AsyncWorker::Env(), 1e3 / timings_token.t_p_eval_ms * timings_token.n_p_eval)); + timingsResult.Set("predicted_n", Napi::Number::New(Napi::AsyncWorker::Env(), timings_token.n_eval)); + timingsResult.Set("predicted_ms", Napi::Number::New(Napi::AsyncWorker::Env(), timings_token.t_eval_ms)); + timingsResult.Set("predicted_per_token_ms", Napi::Number::New(Napi::AsyncWorker::Env(), timings_token.t_eval_ms / timings_token.n_eval)); + timingsResult.Set("predicted_per_second", Napi::Number::New(Napi::AsyncWorker::Env(), 1e3 / timings_token.t_eval_ms * timings_token.n_eval)); + + result.Set("timings", timingsResult); + Napi::Promise::Deferred::Resolve(result); } diff --git a/src/LlamaContext.cpp b/src/LlamaContext.cpp index b456dd1..96b2e8d 100644 --- a/src/LlamaContext.cpp +++ b/src/LlamaContext.cpp @@ -25,6 +25,9 @@ void LlamaContext::Init(Napi::Env env, Napi::Object &exports) { {InstanceMethod<&LlamaContext::GetSystemInfo>( "getSystemInfo", static_cast(napi_enumerable)), + InstanceMethod<&LlamaContext::GetModelInfo>( + "getModelInfo", + static_cast(napi_enumerable)), InstanceMethod<&LlamaContext::GetFormattedChat>( "getFormattedChat", static_cast(napi_enumerable)), @@ -72,9 +75,23 @@ LlamaContext::LlamaContext(const Napi::CallbackInfo &info) if (params.model.empty()) { Napi::TypeError::New(env, "Model is required").ThrowAsJavaScriptException(); } - params.embedding = get_option(options, "embedding", false); + + params.vocab_only = get_option(options, "vocab_only", false); + if (params.vocab_only) { + params.warmup = false; + } + params.n_ctx = get_option(options, "n_ctx", 512); params.n_batch = get_option(options, "n_batch", 2048); + params.embedding = get_option(options, "embedding", false); + if (params.embedding) { + // For non-causal models, batch size must be equal to ubatch size + params.n_ubatch = params.n_batch; + } + params.embd_normalize = get_option(options, "embd_normalize", 2); + int32_t pooling_type = get_option(options, "pooling_type", -1); + params.pooling_type = (enum llama_pooling_type) pooling_type; + params.cpuparams.n_threads = get_option(options, "n_threads", cpu_get_num_math() / 2); params.n_gpu_layers = get_option(options, "n_gpu_layers", -1); @@ -102,6 +119,44 @@ Napi::Value LlamaContext::GetSystemInfo(const Napi::CallbackInfo &info) { return Napi::String::New(info.Env(), _info); } +bool validateModelChatTemplate(const struct llama_model * model) { + std::vector model_template(2048, 0); // longest known template is about 1200 bytes + std::string template_key = "tokenizer.chat_template"; + int32_t res = llama_model_meta_val_str(model, template_key.c_str(), model_template.data(), model_template.size()); + if (res >= 0) { + llama_chat_message chat[] = {{"user", "test"}}; + std::string tmpl = std::string(model_template.data(), model_template.size()); + int32_t chat_res = llama_chat_apply_template(model, tmpl.c_str(), chat, 1, true, nullptr, 0); + return chat_res > 0; + } + return res > 0; +} + +// getModelInfo(): object +Napi::Value LlamaContext::GetModelInfo(const Napi::CallbackInfo &info) { + char desc[1024]; + auto model = _sess->model(); + llama_model_desc(model, desc, sizeof(desc)); + + int count = llama_model_meta_count(model); + Napi::Object metadata = Napi::Object::New(info.Env()); + for (int i = 0; i < count; i++) { + char key[256]; + llama_model_meta_key_by_index(model, i, key, sizeof(key)); + char val[2048]; + llama_model_meta_val_str_by_index(model, i, val, sizeof(val)); + + metadata.Set(key, val); + } + Napi::Object details = Napi::Object::New(info.Env()); + details.Set("desc", desc); + details.Set("nParams", llama_model_n_params(model)); + details.Set("size", llama_model_size(model)); + details.Set("isChatTemplateSupported", validateModelChatTemplate(model)); + details.Set("metadata", metadata); + return details; +} + // getFormattedChat(messages: [{ role: string, content: string }]): string Napi::Value LlamaContext::GetFormattedChat(const Napi::CallbackInfo &info) { Napi::Env env = info.Env(); @@ -164,6 +219,12 @@ Napi::Value LlamaContext::Completion(const Napi::CallbackInfo &info) { params.sampling.penalty_present = get_option(options, "penalty_present", 0.00f); params.sampling.typ_p = get_option(options, "typical_p", 1.00f); + params.sampling.xtc_threshold = get_option(options, "xtc_threshold", 0.00f); + params.sampling.xtc_probability = get_option(options, "xtc_probability", 0.10f); + params.sampling.dry_multiplier = get_option(options, "dry_multiplier", 1.75f); + params.sampling.dry_base = get_option(options, "dry_base", 2); + params.sampling.dry_allowed_length = get_option(options, "dry_allowed_length", -1); + params.sampling.dry_penalty_last_n = get_option(options, "dry_penalty_last_n", 0); params.sampling.ignore_eos = get_option(options, "ignore_eos", false); params.sampling.grammar = get_option(options, "grammar", ""); params.n_keep = get_option(options, "n_keep", 0); @@ -242,8 +303,16 @@ Napi::Value LlamaContext::Embedding(const Napi::CallbackInfo &info) { Napi::TypeError::New(env, "Context is disposed") .ThrowAsJavaScriptException(); } + auto options = Napi::Object::New(env); + if (info.Length() >= 2 && info[1].IsObject()) { + options = info[1].As(); + } + + common_params embdParams; + embdParams.embedding = true; + embdParams.embd_normalize = get_option(options, "embd_normalize", 2); auto text = info[0].ToString().Utf8Value(); - auto *worker = new EmbeddingWorker(info, _sess, text); + auto *worker = new EmbeddingWorker(info, _sess, text, embdParams); worker->Queue(); return worker->Promise(); } diff --git a/src/LlamaContext.h b/src/LlamaContext.h index f53f3ed..b0ef374 100644 --- a/src/LlamaContext.h +++ b/src/LlamaContext.h @@ -9,6 +9,7 @@ class LlamaContext : public Napi::ObjectWrap { private: Napi::Value GetSystemInfo(const Napi::CallbackInfo &info); + Napi::Value GetModelInfo(const Napi::CallbackInfo &info); Napi::Value GetFormattedChat(const Napi::CallbackInfo &info); Napi::Value Completion(const Napi::CallbackInfo &info); void StopCompletion(const Napi::CallbackInfo &info); @@ -20,6 +21,7 @@ class LlamaContext : public Napi::ObjectWrap { Napi::Value Release(const Napi::CallbackInfo &info); std::string _info; + Napi::Object _meta; LlamaSessionPtr _sess = nullptr; LlamaCompletionWorker *_wip = nullptr; }; diff --git a/src/llama.cpp b/src/llama.cpp index 716bd6d..0a11f8b 160000 --- a/src/llama.cpp +++ b/src/llama.cpp @@ -1 +1 @@ -Subproject commit 716bd6dec3e044e5c325386b5b0483392b24cefe +Subproject commit 0a11f8b7b5c39fdf6e91ef9674bc68ff08681af7 diff --git a/test/__snapshots__/index.test.ts.snap b/test/__snapshots__/index.test.ts.snap index 9bb711f..e9329ff 100644 --- a/test/__snapshots__/index.test.ts.snap +++ b/test/__snapshots__/index.test.ts.snap @@ -3,390 +3,390 @@ exports[`embedding 1`] = ` { "embedding": Float32Array [ - -0.31675082445144653, - -0.43599677085876465, - 0.2842176556587219, - -0.3017965853214264, - 0.2392418384552002, - 0.04051382839679718, - 0.4100721478462219, - 0.10454083979129791, - 0.4090535044670105, - -0.06295584887266159, - -0.04712896794080734, - 0.2806476950645447, - 0.40491336584091187, - 0.10194163024425507, - 0.49026718735694885, - -0.009369755163788795, - -0.2465490698814392, - -0.22770199179649353, - -1.0458416938781738, - -0.2166871428489685, - 0.6557914614677429, - -0.7230453491210938, - 0.20650175213813782, - 0.12906213104724884, - -0.3065425157546997, - -0.05055123567581177, - 0.15843096375465393, - -0.2496279776096344, - -0.18578670918941498, - -0.8025084137916565, - -0.09939667582511902, - -0.4805593192577362, - 0.2869286835193634, - -0.02917742356657982, - 0.09968794882297516, - 0.638768196105957, - -0.18080230057239532, - 0.31582072377204895, - -0.16332626342773438, - -0.07381542026996613, - 0.02227707952260971, - -0.12259727716445923, - -0.4199213981628418, - -0.6439017653465271, - -0.9518581032752991, - -0.478310227394104, - -0.09106314927339554, - -0.10656579583883286, - 0.23740722239017487, - 0.17871806025505066, - -0.006209656596183777, - -0.24963046610355377, - 0.5452842712402344, - -0.4196273684501648, - -0.09732893109321594, - 0.5225005149841309, - 0.586061954498291, - -0.006697028875350952, - 0.04724699258804321, - -0.020389355719089508, - 0.28490161895751953, - 0.06631571054458618, - -1.347287893295288, - 0.7759774327278137, - 0.7334375381469727, - 0.2512473464012146, - 0.24622076749801636, - -0.3908783197402954, - 0.5118073225021362, - -0.03271361440420151, - -1.1665935516357422, - 0.060436684638261795, - 0.426295667886734, - 0.24330949783325195, - 0.09367141127586365, - 0.9547717571258545, - 0.1025765910744667, - -0.21356996893882751, - 0.026526503264904022, - -0.06435937434434891, - 0.020274002104997635, - 0.00009178370237350464, - -0.27164608240127563, - 0.29389306902885437, - 0.32194212079048157, - -0.2177257090806961, - 0.1549997627735138, - -0.1569412350654602, - 0.6085831522941589, - -0.13000011444091797, - -0.4939142167568207, - -0.1584230661392212, - 0.014757245779037476, - 0.3467212915420532, - -0.8384324908256531, - -0.38149502873420715, - 0.761687695980072, - -0.5037468075752258, - 0.4623715877532959, - 3.027837038040161, - -0.8218261003494263, - 0.2564074993133545, - 0.4998604953289032, - 0.2681752145290375, - 0.3823198080062866, - -0.45944130420684814, - 0.49553239345550537, - -0.6649777889251709, - 0.7294898629188538, - 0.26804405450820923, - 0.04392679035663605, - -0.7067784070968628, - -0.11972713470458984, - -0.7598733901977539, - 0.10709381848573685, - 0.5217934250831604, - 0.47103384137153625, - -0.2886502742767334, - 0.2997066378593445, - 0.20894336700439453, - 0.26365214586257935, - 0.24776126444339752, - -0.3258114457130432, - -0.5293864607810974, - -0.8539069294929504, - -0.6136033535003662, - 1.0757741928100586, - 0.7267462015151978, - -0.27316009998321533, - -0.2561330795288086, - 0.18105855584144592, - -0.08825583755970001, - 0.19965484738349915, - 0.1672036498785019, - -0.28207316994667053, - -0.12543855607509613, - -0.14294040203094482, - 0.09278200566768646, - 0.040439583361148834, - -0.736630380153656, - -0.25634679198265076, - -1.9601731300354004, - -0.3310612142086029, - -0.5434229969978333, - 0.01788293570280075, - 0.032616131007671356, - 0.609567403793335, - 0.858144998550415, - -0.318093478679657, - -0.0796692967414856, - -0.7168588042259216, - 0.1110445111989975, - -0.567732572555542, - -0.01739158295094967, - 0.28283244371414185, - 0.3364527225494385, - 0.20470762252807617, - 0.19577538967132568, - 0.2783430814743042, - -0.11769482493400574, - -0.21024100482463837, - -0.3101402521133423, - 0.3356964588165283, - -0.23523595929145813, - 0.17819613218307495, - -0.9204550981521606, - 0.10227306187152863, - 0.5551596283912659, - -0.024233419448137283, - -0.1970871239900589, - 0.3630383610725403, - 0.23087607324123383, - -0.4215763211250305, - 0.22759608924388885, - 0.7607041001319885, - 0.11994702368974686, - -0.05169602483510971, - 0.09660237282514572, - -0.2939290404319763, - 0.4620421826839447, - 0.2110845148563385, - -0.28786763548851013, - 0.20992836356163025, - 0.2748706638813019, - 0.3591448962688446, - -0.05198436230421066, - 0.5347371101379395, - -0.31219103932380676, - 0.7624036073684692, - 0.49006152153015137, - 0.721939742565155, - -0.1544194519519806, - 0.2356635332107544, - 0.260989785194397, - -0.4341775178909302, - 0.2175033986568451, - 0.3311929702758789, - -0.658074676990509, - -0.0721643716096878, - -0.9849763512611389, - -0.5786190032958984, - -0.5338155627250671, - 0.19828137755393982, - -0.13761988282203674, - 0.5290082693099976, - 0.31761717796325684, - 0.30320850014686584, - -0.17363403737545013, - -0.12378804385662079, - -0.2294391393661499, - -0.30233079195022583, - 0.04657968878746033, - 0.5284978151321411, - -0.1821138858795166, - -0.3328201174736023, - -0.1596558690071106, - -0.07398349046707153, - 0.21585571765899658, - 0.44374558329582214, - 0.009042628109455109, - 0.3015800714492798, - -0.36871951818466187, - 0.02604338526725769, - -2.6554155349731445, - 0.4089074730873108, - -0.17871621251106262, - -0.24346095323562622, - -0.3486909866333008, - -0.42616716027259827, - -0.047970473766326904, - -0.43238213658332825, - 0.4014579951763153, - 0.025996878743171692, - -0.2683185338973999, - -0.33254432678222656, - 0.18910960853099823, - -0.3303394019603729, - 0.05121683329343796, - 0.17187441885471344, - 0.07126298546791077, - 0.27607858180999756, - 0.17330458760261536, - 0.25166964530944824, - -0.18320614099502563, - 0.289517879486084, - -0.03618074953556061, - -0.21957388520240784, - 0.11612068116664886, - -0.0572085827589035, - 1.4992469549179077, - 1.0933433771133423, - -0.4693331718444824, - -0.3510415554046631, - -0.08302801847457886, - -0.033570028841495514, - -0.29879435896873474, - -0.35438287258148193, - -0.30475133657455444, - 0.3831859230995178, - 0.38844913244247437, - -0.19874590635299683, - -0.13862036168575287, - -0.6022831797599792, - -0.3788949251174927, - 0.13613620400428772, - -0.18078544735908508, - -0.009425848722457886, - -0.7452380061149597, - -0.4157859683036804, - 0.4530068635940552, - 0.13743945956230164, - -0.3175085186958313, - 0.6526327133178711, - 0.24586793780326843, - -0.7100682854652405, - -0.16152799129486084, - 0.06487356871366501, - -0.1884164661169052, - -0.07302089780569077, - -0.1162581741809845, - 0.03140163794159889, - 0.023588567972183228, - 0.04369001090526581, - 0.26604440808296204, - 0.41999179124832153, - -0.2714700698852539, - -0.14512112736701965, - 0.14753544330596924, - 0.1594376266002655, - -0.29822126030921936, - -0.6152225136756897, - -0.0867367833852768, - -0.3691854774951935, - -0.6337385773658752, - 0.21633337438106537, - -0.3805257976055145, - -0.23840826749801636, - 0.18518060445785522, - 0.1698695421218872, - 0.4860868453979492, - -0.09626510739326477, - -0.5617294311523438, - 0.15259182453155518, - -0.31189143657684326, - -0.18293984234333038, - -0.02691025286912918, - 0.5463331937789917, - 0.06472191959619522, - 0.15975123643875122, - -0.21300852298736572, - 0.19371119141578674, - -0.01898816227912903, - -0.006421089172363281, - -0.5573108792304993, - 0.3597659766674042, - 0.4484986662864685, - -0.007104843854904175, - -0.16482914984226227, - -0.2485591173171997, - -2.284099578857422, - 0.14769689738750458, - -0.39700138568878174, - 0.7063485980033875, - -0.1826840341091156, - 0.590281069278717, - -0.10977043211460114, - 0.6633661389350891, - 0.05109164118766785, - -0.19233568012714386, - -0.04799129068851471, - 0.36748436093330383, - 0.5711069107055664, - 0.14837764203548431, - -0.4261610209941864, - 0.31497037410736084, - 0.5925230383872986, - -0.044471852481365204, - 0.0255507230758667, - -0.7241432070732117, - 0.4163000285625458, - 0.1902274489402771, - 1.7171918153762817, - -0.0398159958422184, - 0.01215837150812149, - -0.2655452787876129, - 0.2620833218097687, - 0.09907037019729614, - 0.21091532707214355, - 0.3541150689125061, - 0.09587915986776352, - 0.060591284185647964, - 0.17751094698905945, - -0.2133590281009674, - 0.015092112123966217, - 0.2828346788883209, - -0.152604341506958, - 0.2095182240009308, - 0.1438969522714615, - 0.14312972128391266, - -0.34975650906562805, - 0.42927607893943787, - -0.13484393060207367, - -0.6236618161201477, - 0.7973842024803162, - -0.00305243581533432, - -0.16345366835594177, - -0.0020597651600837708, - -0.40994253754615784, - -0.16733843088150024, - -0.490699827671051, - -0.0993179902434349, - 0.7848128080368042, - 0.34007522463798523, - -0.06727378070354462, - 0.36648187041282654, - -0.028181880712509155, - 0.8266128897666931, - 0.11815033853054047, - 0.20424866676330566, - 0.06183842569589615, - -0.07530013471841812, - 0.30032768845558167, - -0.09024043381214142, - 0.03212149441242218, + -0.03405065834522247, + -0.04686957970261574, + 0.03055335022509098, + -0.032443080097436905, + 0.025718456134200096, + 0.004355229903012514, + 0.04408268630504608, + 0.01123812235891819, + 0.043973181396722794, + -0.006767743267118931, + -0.005066356156021357, + 0.03016958013176918, + 0.04352811723947525, + 0.010958707891404629, + 0.05270364135503769, + -0.0010072471341118217, + -0.02650398388504982, + -0.024477925151586533, + -0.11242780834436417, + -0.023293830454349518, + 0.07049746811389923, + -0.07772725075483322, + 0.022198902443051338, + 0.013874157331883907, + -0.03295326977968216, + -0.005434249062091112, + 0.01703130081295967, + -0.026834964752197266, + -0.01997203938663006, + -0.08626952022314072, + -0.01068512536585331, + -0.051660045981407166, + 0.030844785273075104, + -0.0031365680042654276, + 0.010716437362134457, + 0.068667471408844, + -0.019436215981841087, + 0.033950675278902054, + -0.017557546496391296, + -0.007935144938528538, + 0.0023947823792696, + -0.01317918673157692, + -0.04514148086309433, + -0.06921932846307755, + -0.10232458263635635, + -0.05141826719045639, + -0.009789273142814636, + -0.011455805040895939, + 0.025521235540509224, + 0.019212160259485245, + -0.000667537038680166, + -0.026835232973098755, + 0.0586179681122303, + -0.04510987177491188, + -0.01046284381300211, + 0.05616871640086174, + 0.06300155818462372, + -0.0007199294632300735, + 0.005079043563455343, + -0.0021918523125350475, + 0.030626876279711723, + 0.007128927856683731, + -0.14483322203159332, + 0.08341744542121887, + 0.07884441316127777, + 0.02700904756784439, + 0.026468690484762192, + -0.042019352316856384, + 0.05501919984817505, + -0.0035167080350220203, + -0.1254086047410965, + 0.006496933288872242, + 0.045826710760593414, + 0.026155728846788406, + 0.01006966084241867, + 0.10263780504465103, + 0.011026966385543346, + -0.02295873500406742, + 0.00285159470513463, + -0.006918621715158224, + 0.0021794517524540424, + 0.000009866732398222666, + -0.02920190803706646, + 0.031593456864356995, + 0.03460872173309326, + -0.02340547740459442, + 0.01666244864463806, + -0.01687115617096424, + 0.06542258709669113, + -0.01397499069571495, + -0.05309569463133812, + -0.01703045330941677, + 0.0015864013694226742, + 0.03727247938513756, + -0.09013134986162186, + -0.04101065173745155, + 0.08188129961490631, + -0.05415269732475281, + 0.04970486834645271, + 0.3254919648170471, + -0.08834616839885712, + 0.027563761919736862, + 0.0537349171936512, + 0.02882879041135311, + 0.04109931364655495, + -0.04938986152410507, + 0.0532696470618248, + -0.0714849978685379, + 0.07842003554105759, + 0.028814690187573433, + 0.004722122568637133, + -0.07597856223583221, + -0.012870647013187408, + -0.08168625831604004, + 0.011512567289173603, + 0.05609270557761192, + 0.05063605681061745, + -0.031029855832457542, + 0.032218411564826965, + 0.02246137708425522, + 0.028342561796307564, + 0.026634294539690018, + -0.035024676471948624, + -0.05690895393490791, + -0.09179484844207764, + -0.06596225500106812, + 0.11564554274082184, + 0.07812509685754776, + -0.02936466410756111, + -0.02753426320850849, + 0.019463764503598213, + -0.009487487375736237, + 0.021462861448526382, + 0.017974363639950752, + -0.030322818085551262, + -0.013484623283147812, + -0.015366069041192532, + 0.009974050335586071, + 0.0043472484685480595, + -0.07918763905763626, + -0.027557237073779106, + -0.2107182741165161, + -0.03558902442455292, + -0.05841787904500961, + 0.0019224125426262617, + 0.0035062285605818033, + 0.06552839279174805, + 0.09225044399499893, + -0.03419499471783638, + -0.008564435876905918, + -0.0770621970295906, + 0.01193726621568203, + -0.06103115528821945, + -0.0018695922335609794, + 0.030404439195990562, + 0.03616860881447792, + 0.02200603485107422, + 0.02104582078754902, + 0.02992183342576027, + -0.012652173638343811, + -0.022600872442126274, + -0.03334002569317818, + 0.036087311804294586, + -0.025287825614213943, + 0.01915605366230011, + -0.09894876927137375, + 0.010994336567819118, + 0.05967956408858299, + -0.002605088520795107, + -0.02118683233857155, + 0.03902656212449074, + 0.024819139391183853, + -0.04531938210129738, + 0.024466540664434433, + 0.08177556097507477, + 0.012894284911453724, + -0.0055573140271008015, + 0.010384738445281982, + -0.0315973237156868, + 0.049669455736875534, + 0.022691549733281136, + -0.03094572201371193, + 0.02256726287305355, + 0.02954855002462864, + 0.038608014583587646, + -0.005588310305029154, + 0.05748414993286133, + -0.033560484647750854, + 0.08195825666189194, + 0.05268153175711632, + 0.07760839909315109, + -0.01660006493330002, + 0.025333790108561516, + 0.02805635705590248, + -0.046674009412527084, + 0.02338157780468464, + 0.03560318797826767, + -0.07074291259050369, + -0.007757657673209906, + -0.10588479042053223, + -0.06220144405961037, + -0.05738508328795433, + 0.021315215155482292, + -0.014794114045798779, + 0.0568682998418808, + 0.03414379432797432, + 0.03259486332535744, + -0.018665630370378494, + -0.013307193294167519, + -0.02466466836631298, + -0.032500509172677994, + 0.005007308442145586, + 0.056813426315784454, + -0.019577212631702423, + -0.03577810525894165, + -0.017162978649139404, + -0.007953212596476078, + 0.02320445328950882, + 0.047702573239803314, + 0.0009720809757709503, + 0.03241980820894241, + -0.039637286216020584, + 0.002799659501761198, + -0.28545671701431274, + 0.043957483023405075, + -0.019211962819099426, + -0.026172012090682983, + -0.037484221160411835, + -0.04581289738416672, + -0.0051568178460001945, + -0.04648100584745407, + 0.043156664818525314, + 0.002794660162180662, + -0.0288441963493824, + -0.03574845939874649, + 0.020329250022768974, + -0.035511430352926254, + 0.00550580071285367, + 0.018476471304893494, + 0.0076607586815953255, + 0.029678400605916977, + 0.018630214035511017, + 0.027054443955421448, + -0.019694628193974495, + 0.031123122200369835, + -0.003889424493536353, + -0.023604154586791992, + 0.012482953257858753, + -0.006149912718683481, + 0.16116879880428314, + 0.11753422766923904, + -0.05045323818922043, + -0.037736907601356506, + -0.00892549753189087, + -0.003608772298321128, + -0.032120343297719955, + -0.038096100091934204, + -0.03276071697473526, + 0.041192423552274704, + 0.0417582169175148, + -0.021365150809288025, + -0.014901665039360523, + -0.06474533677101135, + -0.04073113948106766, + 0.014634618535637856, + -0.019434405490756035, + -0.0010132770985364914, + -0.08011295646429062, + -0.04469691962003708, + 0.04869816079735756, + 0.014774718321859837, + -0.03413211181759834, + 0.07015790790319443, + 0.026430761441588402, + -0.0763322189450264, + -0.01736423186957836, + 0.006973897572606802, + -0.02025473862886429, + -0.007849734276533127, + -0.012497734278440475, + 0.003375670639798045, + 0.0025357671547681093, + 0.004696668591350317, + 0.028599727898836136, + 0.045149046927690506, + -0.02918298728764057, + -0.015600496903061867, + 0.01586003601551056, + 0.01713951863348484, + -0.03205873444676399, + -0.06613631546497345, + -0.009324189275503159, + -0.0396873764693737, + -0.06812679022550583, + 0.02325580082833767, + -0.04090645909309387, + -0.025628848001360893, + 0.01990688405930996, + 0.01826094649732113, + 0.05225425213575363, + -0.010348482988774776, + -0.060385819524526596, + 0.016403594985604286, + -0.03352827578783035, + -0.019666001200675964, + -0.002892847638577223, + 0.058730725198984146, + 0.006957595236599445, + 0.01717323064804077, + -0.02289837971329689, + 0.020823920145630836, + -0.002041224157437682, + -0.0006902660243213177, + -0.05991082638502121, + 0.03867478296160698, + 0.04821353033185005, + -0.000763769494369626, + -0.017719104886054993, + -0.026720063760876656, + -0.2455403208732605, + 0.015877392143011093, + -0.04267758131027222, + 0.07593235373497009, + -0.019638502970337868, + 0.06345511227846146, + -0.011800303123891354, + 0.07131174951791763, + 0.005492342635989189, + -0.020676052197813988, + -0.0051590558141469955, + 0.039504505693912506, + 0.061393894255161285, + 0.01595057174563408, + -0.04581223800778389, + 0.0338592603802681, + 0.06369612365961075, + -0.0047807167284190655, + 0.002746698446571827, + -0.07784527540206909, + 0.04475218430161476, + 0.02044941857457161, + 0.18459783494472504, + -0.004280212800949812, + 0.0013070228742435575, + -0.02854607254266739, + 0.028173912316560745, + 0.010650048032402992, + 0.022673361003398895, + 0.03806731104850769, + 0.010306993499398232, + 0.006513552740216255, + 0.01908239722251892, + -0.022936059162020683, + 0.0016223995480686426, + 0.030404679477214813, + -0.01640494167804718, + 0.022523174062371254, + 0.015468898229300976, + 0.015386420302093029, + -0.03759876638650894, + 0.04614710435271263, + -0.014495699666440487, + -0.06704354286193848, + 0.08571866899728775, + -0.0003281363460700959, + -0.01757124252617359, + -0.0002214244013885036, + -0.044068753719329834, + -0.01798885315656662, + -0.05275014787912369, + -0.010676667094230652, + 0.08436724543571472, + 0.03655802831053734, + -0.007231920026242733, + 0.0393967404961586, + -0.0030295473989099264, + 0.08886074274778366, + 0.012701141647994518, + 0.02195669710636139, + 0.006647620350122452, + -0.00809475127607584, + 0.032285176217556, + -0.009700831025838852, + 0.00345305516384542, ], } `; @@ -413,11 +413,94 @@ exports[`tokeneize 3`] = ` " `; -exports[`work fine 1`] = ` +exports[`works fine 1`] = ` { "text": " swochadoorter scientific WindowsCa occupiedrÃ¥ alta", + "timings": "Timings: (8) keys", "tokens_evaluated": 18, "tokens_predicted": 10, "truncated": false, } `; + +exports[`works fine with vocab_only: empty result 1`] = ` +{ + "text": "", + "timings": { + "predicted_ms": 0, + "predicted_n": 1, + "predicted_per_second": Infinity, + "predicted_per_token_ms": 0, + "prompt_ms": 0, + "prompt_n": 1, + "prompt_per_second": Infinity, + "prompt_per_token_ms": 0, + }, + "tokens_evaluated": 0, + "tokens_predicted": 0, + "truncated": false, +} +`; + +exports[`works fine with vocab_only: model info 1`] = ` +{ + "desc": "llama ?B all F32", + "isChatTemplateSupported": false, + "metadata": { + "general.architecture": "llama", + "general.file_type": "1", + "general.name": "LLaMA v2", + "llama.attention.head_count": "2", + "llama.attention.head_count_kv": "2", + "llama.attention.layer_norm_rms_epsilon": "0.000010", + "llama.block_count": "1", + "llama.context_length": "4096", + "llama.embedding_length": "8", + "llama.feed_forward_length": "32", + "llama.rope.dimension_count": "4", + "tokenizer.ggml.bos_token_id": "1", + "tokenizer.ggml.eos_token_id": "2", + "tokenizer.ggml.model": "llama", + "tokenizer.ggml.unknown_token_id": "0", + }, + "nParams": 513048, + "size": 1026144, +} +`; + +exports[`works fine with vocab_only: tokenize 1`] = ` +{ + "tokens": Int32Array [ + 9038, + 2501, + 263, + 931, + ], +} +`; + +exports[`works fine: model info 1`] = ` +{ + "desc": "llama ?B F16", + "isChatTemplateSupported": false, + "metadata": { + "general.architecture": "llama", + "general.file_type": "1", + "general.name": "LLaMA v2", + "llama.attention.head_count": "2", + "llama.attention.head_count_kv": "2", + "llama.attention.layer_norm_rms_epsilon": "0.000010", + "llama.block_count": "1", + "llama.context_length": "4096", + "llama.embedding_length": "8", + "llama.feed_forward_length": "32", + "llama.rope.dimension_count": "4", + "tokenizer.ggml.bos_token_id": "1", + "tokenizer.ggml.eos_token_id": "2", + "tokenizer.ggml.model": "llama", + "tokenizer.ggml.unknown_token_id": "0", + }, + "nParams": 513048, + "size": 1026144, +} +`; diff --git a/test/index.test.ts b/test/index.test.ts index e0fbeda..1dc9cf3 100644 --- a/test/index.test.ts +++ b/test/index.test.ts @@ -2,9 +2,11 @@ import path from 'path' import waitForExpect from 'wait-for-expect' import { loadModel } from '../lib' -it('work fine', async () => { +it('works fine', async () => { let tokens = '' const model = await loadModel({ model: path.resolve(__dirname, './tiny-random-llama.gguf') }) + const info = model.getModelInfo() + expect(info).toMatchSnapshot('model info') const result = await model.completion({ prompt: 'My name is Merve and my favorite', n_samples: 1, @@ -16,7 +18,10 @@ it('work fine', async () => { expect(data).toMatchObject({ token: expect.any(String) }) tokens += data.token }) - expect(result).toMatchSnapshot() + expect({ + ...result, + timings: `Timings: (${Object.keys(result.timings).length}) keys` + }).toMatchSnapshot() await waitForExpect(() => { expect(tokens).toBe(result.text) }) @@ -25,6 +30,13 @@ it('work fine', async () => { await model.release() }) +it('works fine with vocab_only', async () => { + const model = await loadModel({ model: path.resolve(__dirname, './tiny-random-llama.gguf'), vocab_only: true }) + expect(model.getModelInfo()).toMatchSnapshot('model info') + expect(await model.tokenize('Once upon a time')).toMatchSnapshot('tokenize') + expect(await model.completion({ prompt: 'Once upon a time' })).toMatchSnapshot('empty result') +}) + it('tokeneize', async () => { const model = await loadModel({ model: path.resolve(__dirname, './tiny-random-llama.gguf') }) {