-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: implement vocab_only mode for context
- Loading branch information
Showing
7 changed files
with
107 additions
and
14 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
diff --git a/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt b/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt | ||
index 683b90af..e1bf104c 100644 | ||
--- a/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt | ||
+++ b/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt | ||
@@ -80,7 +80,7 @@ function(ggml_add_cpu_backend_variant_impl tag_name) | ||
message(STATUS "ARM detected") | ||
|
||
if (MSVC AND NOT CMAKE_C_COMPILER_ID STREQUAL "Clang") | ||
- message(FATAL_ERROR "MSVC is not supported for ARM, use clang") | ||
+ list(APPEND ARCH_FLAGS /arch:armv8.7) | ||
else() | ||
check_cxx_compiler_flag(-mfp16-format=ieee COMPILER_SUPPORTS_FP16_FORMAT_I3E) | ||
if (NOT "${COMPILER_SUPPORTS_FP16_FORMAT_I3E}" STREQUAL "") | ||
diff --git a/src/llama.cpp/common/common.h b/src/llama.cpp/common/common.h | ||
index 1d2bd932..b5007c66 100644 | ||
--- a/src/llama.cpp/common/common.h | ||
+++ b/src/llama.cpp/common/common.h | ||
@@ -183,6 +183,7 @@ struct common_params_vocoder { | ||
}; | ||
|
||
struct common_params { | ||
+ bool vocab_only = false; | ||
int32_t n_predict = -1; // new tokens to predict | ||
int32_t n_ctx = 4096; // context size | ||
int32_t n_batch = 2048; // logical batch size for prompt processing (must be >=32 to use BLAS) | ||
diff --git a/src/llama.cpp/common/common.cpp b/src/llama.cpp/common/common.cpp | ||
index 20be9291..1bedc55d 100644 | ||
--- a/src/llama.cpp/common/common.cpp | ||
+++ b/src/llama.cpp/common/common.cpp | ||
@@ -1017,6 +1017,7 @@ struct llama_model_params common_model_params_to_llama(common_params & params) { | ||
if (params.n_gpu_layers != -1) { | ||
mparams.n_gpu_layers = params.n_gpu_layers; | ||
} | ||
+ mparams.vocab_only = params.vocab_only; | ||
mparams.rpc_servers = params.rpc_servers.c_str(); | ||
mparams.main_gpu = params.main_gpu; | ||
mparams.split_mode = params.split_mode; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters