Skip to content

Commit

Permalink
Merge branch 'main' into jhen-dev
Browse files Browse the repository at this point in the history
  • Loading branch information
jhen0409 committed Jan 2, 2025
2 parents b9ec7c8 + 5c91e0d commit ba19a63
Show file tree
Hide file tree
Showing 14 changed files with 661 additions and 410 deletions.
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
name: Release build
name: Build release artifacts
on:
push:
workflow_dispatch:
inputs:
upload-artifacts:
Expand Down Expand Up @@ -43,16 +42,14 @@ jobs:
- name: Install dependencies
run: yarn install
- name: Prepare & build
env:
CMAKE_BUILD_PARALLEL_LEVEL: 4
run: |
bash ./scripts/prepare-linux.sh
bash ./scripts/build-linux.sh
- name: Upload build artifacts
if: inputs.upload-artifacts == 'true'
uses: actions/upload-artifact@v4
with:
name: bin-linux-arm64
name: bin-linux-x86_64
path: bin
retention-days: ${{ inputs.artifacts-retention-days }}

Expand Down Expand Up @@ -83,8 +80,6 @@ jobs:
with:
platforms: linux/arm64
- name: Prepare & build
env:
CMAKE_BUILD_PARALLEL_LEVEL: 4
run: |
docker run --rm \
-e CMAKE_BUILD_PARALLEL_LEVEL=${{ env.CMAKE_BUILD_PARALLEL_LEVEL }} \
Expand Down Expand Up @@ -127,14 +122,12 @@ jobs:
- name: Install dependencies
run: yarn install
- name: Build (macOS)
env:
CMAKE_BUILD_PARALLEL_LEVEL: 4
run: bash ./scripts/build-macos.sh
- name: Upload build artifacts
if: inputs.upload-artifacts == 'true'
uses: actions/upload-artifact@v4
with:
name: bin-macos
name: bin-${{ matrix.os }}
path: bin
retention-days: ${{ inputs.artifacts-retention-days }}

Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ on:

jobs:
build:
uses: ./.github/workflows/release-build.yml
uses: ./.github/workflows/build-release.yml
with:
upload-artifacts: true
artifacts-retention-days: 3
Expand Down
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ project (llama-node)
set(CMAKE_CXX_STANDARD 17)

execute_process(COMMAND
git apply ${CMAKE_CURRENT_SOURCE_DIR}/scripts/ggml-cpu-CMakeLists.txt.patch
git apply ${CMAKE_CURRENT_SOURCE_DIR}/scripts/llama.cpp.patch
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
)

Expand Down
30 changes: 29 additions & 1 deletion lib/binding.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,15 @@ export type ChatMessage = {
export type LlamaModelOptions = {
model: string
embedding?: boolean
embd_normalize?: number
pooling_type?: number
n_ctx?: number
n_batch?: number
n_threads?: number
n_gpu_layers?: number
use_mlock?: boolean
use_mmap?: boolean
vocab_only?: boolean
}

export type LlamaCompletionOptions = {
Expand All @@ -23,7 +26,21 @@ export type LlamaCompletionOptions = {
temperature?: number
top_k?: number
top_p?: number
repetition_penalty?: number
min_p?: number
mirostat?: number
mirostat_tau?: number
mirostat_eta?: number
penalty_last_n?: number
penalty_repeat?: number
penalty_freq?: number
penalty_present?: number
typ_p?: number
xtc_threshold?: number
xtc_probability?: number
dry_multiplier?: number
dry_base?: number
dry_allowed_length?: number
dry_penalty_last_n?: number
n_predict?: number
max_length?: number
max_tokens?: number
Expand All @@ -37,6 +54,16 @@ export type LlamaCompletionResult = {
tokens_predicted: number
tokens_evaluated: number
truncated: boolean
timings: {
prompt_n: number
prompt_ms: number
prompt_per_token_ms: number
prompt_per_second: number
predicted_n: number
predicted_ms: number
predicted_per_token_ms: number
predicted_per_second: number
}
}

export type LlamaCompletionToken = {
Expand All @@ -54,6 +81,7 @@ export type EmbeddingResult = {
export interface LlamaContext {
new (options: LlamaModelOptions): LlamaContext
getSystemInfo(): string
getModelInfo(): object
getFormattedChat(messages: ChatMessage[]): string
completion(options: LlamaCompletionOptions, callback?: (token: LlamaCompletionToken) => void): Promise<LlamaCompletionResult>
stopCompletion(): void
Expand Down
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"name": "@fugood/llama.node",
"access": "public",
"version": "0.3.3",
"version": "0.3.4",
"description": "Llama.cpp for Node.js",
"main": "lib/index.js",
"scripts": {
Expand Down
37 changes: 37 additions & 0 deletions scripts/llama.cpp.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
diff --git a/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt b/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt
index 683b90af..e1bf104c 100644
--- a/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt
+++ b/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt
@@ -80,7 +80,7 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
message(STATUS "ARM detected")

if (MSVC AND NOT CMAKE_C_COMPILER_ID STREQUAL "Clang")
- message(FATAL_ERROR "MSVC is not supported for ARM, use clang")
+ list(APPEND ARCH_FLAGS /arch:armv8.7)
else()
check_cxx_compiler_flag(-mfp16-format=ieee COMPILER_SUPPORTS_FP16_FORMAT_I3E)
if (NOT "${COMPILER_SUPPORTS_FP16_FORMAT_I3E}" STREQUAL "")
diff --git a/src/llama.cpp/common/common.h b/src/llama.cpp/common/common.h
index 1d2bd932..b5007c66 100644
--- a/src/llama.cpp/common/common.h
+++ b/src/llama.cpp/common/common.h
@@ -183,6 +183,7 @@ struct common_params_vocoder {
};

struct common_params {
+ bool vocab_only = false;
int32_t n_predict = -1; // new tokens to predict
int32_t n_ctx = 4096; // context size
int32_t n_batch = 2048; // logical batch size for prompt processing (must be >=32 to use BLAS)
diff --git a/src/llama.cpp/common/common.cpp b/src/llama.cpp/common/common.cpp
index 20be9291..1bedc55d 100644
--- a/src/llama.cpp/common/common.cpp
+++ b/src/llama.cpp/common/common.cpp
@@ -1017,6 +1017,7 @@ struct llama_model_params common_model_params_to_llama(common_params & params) {
if (params.n_gpu_layers != -1) {
mparams.n_gpu_layers = params.n_gpu_layers;
}
+ mparams.vocab_only = params.vocab_only;
mparams.rpc_servers = params.rpc_servers.c_str();
mparams.main_gpu = params.main_gpu;
mparams.split_mode = params.split_mode;
20 changes: 15 additions & 5 deletions src/EmbeddingWorker.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
#include "LlamaContext.h"

EmbeddingWorker::EmbeddingWorker(const Napi::CallbackInfo &info,
LlamaSessionPtr &sess, std::string text)
: AsyncWorker(info.Env()), Deferred(info.Env()), _sess(sess), _text(text) {}
LlamaSessionPtr &sess, std::string text, common_params &params)
: AsyncWorker(info.Env()), Deferred(info.Env()), _sess(sess), _text(text), _params(params) {}

void EmbeddingWorker::Execute() {
llama_kv_cache_clear(_sess->context());
Expand All @@ -14,20 +14,30 @@ void EmbeddingWorker::Execute() {
}
const int n_embd = llama_n_embd(_sess->model());
do {
auto ctx = _sess->context();
int ret =
llama_decode(_sess->context(),
llama_decode(ctx,
llama_batch_get_one(tokens.data(), tokens.size()));
if (ret < 0) {
SetError("Failed to inference, code: " + std::to_string(ret));
break;
}
const float *embd = llama_get_embeddings_seq(_sess->context(), 0);

float *embd;
const enum llama_pooling_type pooling_type = llama_pooling_type(ctx);
if (pooling_type == LLAMA_POOLING_TYPE_NONE) {
embd = llama_get_embeddings(ctx);
} else {
embd = llama_get_embeddings_seq(ctx, 0);
}
if (embd == nullptr) {
SetError("Failed to get embeddings");
break;
}
_result.embedding.resize(n_embd);
memcpy(_result.embedding.data(), embd, n_embd * sizeof(float));
std::vector<float> embedding(embd, embd + n_embd), out(embd, embd + n_embd);
common_embd_normalize(embedding.data(), out.data(), n_embd, _params.embd_normalize);
memcpy(_result.embedding.data(), out.data(), n_embd * sizeof(float));
} while (false);
}

Expand Down
3 changes: 2 additions & 1 deletion src/EmbeddingWorker.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ class EmbeddingWorker : public Napi::AsyncWorker,
public Napi::Promise::Deferred {
public:
EmbeddingWorker(const Napi::CallbackInfo &info, LlamaSessionPtr &sess,
std::string text);
std::string text, common_params &params);

protected:
void Execute();
Expand All @@ -19,5 +19,6 @@ class EmbeddingWorker : public Napi::AsyncWorker,
private:
LlamaSessionPtr _sess;
std::string _text;
common_params _params;
EmbeddingResult _result;
};
16 changes: 16 additions & 0 deletions src/LlamaCompletionWorker.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,22 @@ void LlamaCompletionWorker::OnOK() {
Napi::Boolean::New(Napi::AsyncWorker::Env(), _result.truncated));
result.Set("text",
Napi::String::New(Napi::AsyncWorker::Env(), _result.text.c_str()));

auto ctx = _sess->context();
const auto timings_token = llama_perf_context(ctx);

auto timingsResult = Napi::Object::New(Napi::AsyncWorker::Env());
timingsResult.Set("prompt_n", Napi::Number::New(Napi::AsyncWorker::Env(), timings_token.n_p_eval));
timingsResult.Set("prompt_ms", Napi::Number::New(Napi::AsyncWorker::Env(), timings_token.t_p_eval_ms));
timingsResult.Set("prompt_per_token_ms", Napi::Number::New(Napi::AsyncWorker::Env(), timings_token.t_p_eval_ms / timings_token.n_p_eval));
timingsResult.Set("prompt_per_second", Napi::Number::New(Napi::AsyncWorker::Env(), 1e3 / timings_token.t_p_eval_ms * timings_token.n_p_eval));
timingsResult.Set("predicted_n", Napi::Number::New(Napi::AsyncWorker::Env(), timings_token.n_eval));
timingsResult.Set("predicted_ms", Napi::Number::New(Napi::AsyncWorker::Env(), timings_token.t_eval_ms));
timingsResult.Set("predicted_per_token_ms", Napi::Number::New(Napi::AsyncWorker::Env(), timings_token.t_eval_ms / timings_token.n_eval));
timingsResult.Set("predicted_per_second", Napi::Number::New(Napi::AsyncWorker::Env(), 1e3 / timings_token.t_eval_ms * timings_token.n_eval));

result.Set("timings", timingsResult);

Napi::Promise::Deferred::Resolve(result);
}

Expand Down
73 changes: 71 additions & 2 deletions src/LlamaContext.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,9 @@ void LlamaContext::Init(Napi::Env env, Napi::Object &exports) {
{InstanceMethod<&LlamaContext::GetSystemInfo>(
"getSystemInfo",
static_cast<napi_property_attributes>(napi_enumerable)),
InstanceMethod<&LlamaContext::GetModelInfo>(
"getModelInfo",
static_cast<napi_property_attributes>(napi_enumerable)),
InstanceMethod<&LlamaContext::GetFormattedChat>(
"getFormattedChat",
static_cast<napi_property_attributes>(napi_enumerable)),
Expand Down Expand Up @@ -72,9 +75,23 @@ LlamaContext::LlamaContext(const Napi::CallbackInfo &info)
if (params.model.empty()) {
Napi::TypeError::New(env, "Model is required").ThrowAsJavaScriptException();
}
params.embedding = get_option<bool>(options, "embedding", false);

params.vocab_only = get_option<bool>(options, "vocab_only", false);
if (params.vocab_only) {
params.warmup = false;
}

params.n_ctx = get_option<int32_t>(options, "n_ctx", 512);
params.n_batch = get_option<int32_t>(options, "n_batch", 2048);
params.embedding = get_option<bool>(options, "embedding", false);
if (params.embedding) {
// For non-causal models, batch size must be equal to ubatch size
params.n_ubatch = params.n_batch;
}
params.embd_normalize = get_option<int32_t>(options, "embd_normalize", 2);
int32_t pooling_type = get_option<int32_t>(options, "pooling_type", -1);
params.pooling_type = (enum llama_pooling_type) pooling_type;

params.cpuparams.n_threads =
get_option<int32_t>(options, "n_threads", cpu_get_num_math() / 2);
params.n_gpu_layers = get_option<int32_t>(options, "n_gpu_layers", -1);
Expand Down Expand Up @@ -102,6 +119,44 @@ Napi::Value LlamaContext::GetSystemInfo(const Napi::CallbackInfo &info) {
return Napi::String::New(info.Env(), _info);
}

bool validateModelChatTemplate(const struct llama_model * model) {
std::vector<char> model_template(2048, 0); // longest known template is about 1200 bytes
std::string template_key = "tokenizer.chat_template";
int32_t res = llama_model_meta_val_str(model, template_key.c_str(), model_template.data(), model_template.size());
if (res >= 0) {
llama_chat_message chat[] = {{"user", "test"}};
std::string tmpl = std::string(model_template.data(), model_template.size());
int32_t chat_res = llama_chat_apply_template(model, tmpl.c_str(), chat, 1, true, nullptr, 0);
return chat_res > 0;
}
return res > 0;
}

// getModelInfo(): object
Napi::Value LlamaContext::GetModelInfo(const Napi::CallbackInfo &info) {
char desc[1024];
auto model = _sess->model();
llama_model_desc(model, desc, sizeof(desc));

int count = llama_model_meta_count(model);
Napi::Object metadata = Napi::Object::New(info.Env());
for (int i = 0; i < count; i++) {
char key[256];
llama_model_meta_key_by_index(model, i, key, sizeof(key));
char val[2048];
llama_model_meta_val_str_by_index(model, i, val, sizeof(val));

metadata.Set(key, val);
}
Napi::Object details = Napi::Object::New(info.Env());
details.Set("desc", desc);
details.Set("nParams", llama_model_n_params(model));
details.Set("size", llama_model_size(model));
details.Set("isChatTemplateSupported", validateModelChatTemplate(model));
details.Set("metadata", metadata);
return details;
}

// getFormattedChat(messages: [{ role: string, content: string }]): string
Napi::Value LlamaContext::GetFormattedChat(const Napi::CallbackInfo &info) {
Napi::Env env = info.Env();
Expand Down Expand Up @@ -164,6 +219,12 @@ Napi::Value LlamaContext::Completion(const Napi::CallbackInfo &info) {
params.sampling.penalty_present =
get_option<float>(options, "penalty_present", 0.00f);
params.sampling.typ_p = get_option<float>(options, "typical_p", 1.00f);
params.sampling.xtc_threshold = get_option<float>(options, "xtc_threshold", 0.00f);
params.sampling.xtc_probability = get_option<float>(options, "xtc_probability", 0.10f);
params.sampling.dry_multiplier = get_option<float>(options, "dry_multiplier", 1.75f);
params.sampling.dry_base = get_option<float>(options, "dry_base", 2);
params.sampling.dry_allowed_length = get_option<float>(options, "dry_allowed_length", -1);
params.sampling.dry_penalty_last_n = get_option<float>(options, "dry_penalty_last_n", 0);
params.sampling.ignore_eos = get_option<bool>(options, "ignore_eos", false);
params.sampling.grammar = get_option<std::string>(options, "grammar", "");
params.n_keep = get_option<int32_t>(options, "n_keep", 0);
Expand Down Expand Up @@ -242,8 +303,16 @@ Napi::Value LlamaContext::Embedding(const Napi::CallbackInfo &info) {
Napi::TypeError::New(env, "Context is disposed")
.ThrowAsJavaScriptException();
}
auto options = Napi::Object::New(env);
if (info.Length() >= 2 && info[1].IsObject()) {
options = info[1].As<Napi::Object>();
}

common_params embdParams;
embdParams.embedding = true;
embdParams.embd_normalize = get_option<int32_t>(options, "embd_normalize", 2);
auto text = info[0].ToString().Utf8Value();
auto *worker = new EmbeddingWorker(info, _sess, text);
auto *worker = new EmbeddingWorker(info, _sess, text, embdParams);
worker->Queue();
return worker->Promise();
}
Expand Down
2 changes: 2 additions & 0 deletions src/LlamaContext.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ class LlamaContext : public Napi::ObjectWrap<LlamaContext> {

private:
Napi::Value GetSystemInfo(const Napi::CallbackInfo &info);
Napi::Value GetModelInfo(const Napi::CallbackInfo &info);
Napi::Value GetFormattedChat(const Napi::CallbackInfo &info);
Napi::Value Completion(const Napi::CallbackInfo &info);
void StopCompletion(const Napi::CallbackInfo &info);
Expand All @@ -20,6 +21,7 @@ class LlamaContext : public Napi::ObjectWrap<LlamaContext> {
Napi::Value Release(const Napi::CallbackInfo &info);

std::string _info;
Napi::Object _meta;
LlamaSessionPtr _sess = nullptr;
LlamaCompletionWorker *_wip = nullptr;
};
2 changes: 1 addition & 1 deletion src/llama.cpp
Submodule llama.cpp updated 89 files
+0 −81 .devops/cpu.Dockerfile
+0 −94 .devops/cuda.Dockerfile
+33 −0 .devops/full-cuda.Dockerfile
+33 −0 .devops/full-musa.Dockerfile
+50 −0 .devops/full-rocm.Dockerfile
+38 −0 .devops/full.Dockerfile
+0 −91 .devops/intel.Dockerfile
+38 −0 .devops/llama-cli-cuda.Dockerfile
+28 −0 .devops/llama-cli-intel.Dockerfile
+38 −0 .devops/llama-cli-musa.Dockerfile
+45 −0 .devops/llama-cli-rocm.Dockerfile
+27 −0 .devops/llama-cli-vulkan.Dockerfile
+29 −0 .devops/llama-cli.Dockerfile
+43 −0 .devops/llama-server-cuda.Dockerfile
+34 −0 .devops/llama-server-intel.Dockerfile
+43 −0 .devops/llama-server-musa.Dockerfile
+54 −0 .devops/llama-server-rocm.Dockerfile
+31 −0 .devops/llama-server-vulkan.Dockerfile
+33 −0 .devops/llama-server.Dockerfile
+0 −108 .devops/musa.Dockerfile
+0 −113 .devops/rocm.Dockerfile
+0 −88 .devops/vulkan.Dockerfile
+28 −76 .github/workflows/docker.yml
+1 −210 convert_hf_to_gguf.py
+0 −2 convert_hf_to_gguf_update.py
+1 −1 examples/cvector-generator/mean.hpp
+1 −1 examples/cvector-generator/pca.hpp
+3 −3 examples/export-lora/export-lora.cpp
+1 −3 examples/llama.android/llama/src/main/cpp/llama-android.cpp
+0 −12 examples/rpc/rpc-server.cpp
+0 −2 examples/run/README.md
+38 −73 examples/run/run.cpp
+0 −1 examples/server/CMakeLists.txt
+1 −4 examples/server/README.md
+ examples/server/public/index.html.gz
+17 −38 examples/server/server.cpp
+0 −3 examples/server/tests/unit/test_chat_completion.py
+3 −38 examples/server/tests/unit/test_completion.py
+0 −41 examples/server/tests/unit/test_embedding.py
+6 −46 examples/server/utils.hpp
+4 −19 examples/server/webui/src/main.js
+0 −1 ggml/src/CMakeLists.txt
+49 −74 ggml/src/ggml-backend-reg.cpp
+21 −32 ggml/src/ggml-cpu/CMakeLists.txt
+71 −51 ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp
+6 −6 ggml/src/ggml-cpu/ggml-cpu.c
+258 −264 ggml/src/ggml-cpu/llamafile/sgemm.cpp
+2 −2 ggml/src/ggml-cpu/llamafile/sgemm.h
+3 −5 ggml/src/ggml-sycl/common.cpp
+0 −4 ggml/src/ggml-sycl/common.hpp
+20 −26 ggml/src/ggml-sycl/ggml-sycl.cpp
+92 −164 ggml/src/ggml-vulkan/ggml-vulkan.cpp
+2 −2 ggml/src/ggml-vulkan/vulkan-shaders/acc.comp
+1 −1 ggml/src/ggml-vulkan/vulkan-shaders/add.comp
+2 −2 ggml/src/ggml-vulkan/vulkan-shaders/clamp.comp
+3 −3 ggml/src/ggml-vulkan/vulkan-shaders/concat.comp
+4 −4 ggml/src/ggml-vulkan/vulkan-shaders/contig_copy.comp
+2 −2 ggml/src/ggml-vulkan/vulkan-shaders/copy.comp
+2 −2 ggml/src/ggml-vulkan/vulkan-shaders/cos.comp
+25 −45 ggml/src/ggml-vulkan/vulkan-shaders/dequant_funcs_cm2.comp
+1 −1 ggml/src/ggml-vulkan/vulkan-shaders/div.comp
+1 −5 ggml/src/ggml-vulkan/vulkan-shaders/generic_binary_head.comp
+1 −4 ggml/src/ggml-vulkan/vulkan-shaders/generic_unary_head.comp
+3 −3 ggml/src/ggml-vulkan/vulkan-shaders/get_rows.comp
+22 −47 ggml/src/ggml-vulkan/vulkan-shaders/im2col.comp
+1 −1 ggml/src/ggml-vulkan/vulkan-shaders/mul.comp
+71 −53 ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec.comp
+0 −33 ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_base.comp
+74 −76 ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q2_k.comp
+59 −62 ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q3_k.comp
+90 −92 ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q4_k.comp
+120 −122 ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q5_k.comp
+69 −71 ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q6_k.comp
+1 −1 ggml/src/ggml-vulkan/vulkan-shaders/pad.comp
+1 −1 ggml/src/ggml-vulkan/vulkan-shaders/repeat.comp
+1 −1 ggml/src/ggml-vulkan/vulkan-shaders/scale.comp
+2 −2 ggml/src/ggml-vulkan/vulkan-shaders/sin.comp
+2 −2 ggml/src/ggml-vulkan/vulkan-shaders/square.comp
+2 −2 ggml/src/ggml-vulkan/vulkan-shaders/upscale.comp
+1 −2 ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp
+0 −26 gguf-py/gguf/constants.py
+0 −1 gguf-py/gguf/tensor_mapping.py
+10 −12 scripts/compare-llama-bench.py
+1 −1 scripts/hf.sh
+1 −1 src/llama-vocab.cpp
+1 −1 src/llama-vocab.h
+2 −298 src/llama.cpp
+1 −13 tests/test-backend-ops.cpp
+0 −4 tests/test-chat-template.cpp
Loading

0 comments on commit ba19a63

Please sign in to comment.