From 2df373ac40ea581ccca8a58c713f03ad9d4b658d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Johannes=20G=C3=A4=C3=9Fler?= <johannesg@5d6.de>
Date: Tue, 25 Jun 2024 01:22:33 +0200
Subject: [PATCH 01/15] CUDA: fix matrix multiplication algorithm choice
 (#8102)

---
 ggml-cuda.cu | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/ggml-cuda.cu b/ggml-cuda.cu
index 2dda039242531..0acfda91d3e51 100644
--- a/ggml-cuda.cu
+++ b/ggml-cuda.cu
@@ -1924,16 +1924,16 @@ static void ggml_cuda_mul_mat(ggml_backend_cuda_context & ctx, const ggml_tensor
     } else if (!split && any_gpus_with_slow_fp16 && src0->type == GGML_TYPE_F16 && !ggml_is_contiguous(src0) && !ggml_is_transposed(src1) && src1->ne[1] == 1) {
         // FP32 precision KQV single-batch for batch size 1 without FlashAttention
         ggml_cuda_mul_mat_vec_nc(ctx, src0, src1, dst);
+    } else if (!split && src0->type == GGML_TYPE_F16 && (src1->type == GGML_TYPE_F16 || !any_gpus_with_slow_fp16)
+               && !ggml_is_transposed(src0) && !ggml_is_transposed(src1) && src1->ne[2]*src1->ne[3] > 1) {
+        // KQ + KQV multi-batch without FlashAttention
+        ggml_cuda_mul_mat_batched_cublas(ctx, src0, src1, dst);
     } else if (use_dequantize_mul_mat_vec) {
         ggml_cuda_op_mul_mat(ctx, src0, src1, dst, ggml_cuda_op_dequantize_mul_mat_vec, nullptr);
     } else if (use_mul_mat_vec_q) {
         ggml_cuda_op_mul_mat(ctx, src0, src1, dst, ggml_cuda_op_mul_mat_vec_q, quantize_row_q8_1_cuda);
     } else if (use_mul_mat_q) {
         ggml_cuda_op_mul_mat(ctx, src0, src1, dst, ggml_cuda_op_mul_mat_q, quantize_mmq_q8_1_cuda);
-    } else if (!split && src0->type == GGML_TYPE_F16 && (src1->type == GGML_TYPE_F16 || !any_gpus_with_slow_fp16)
-               && !ggml_is_transposed(src0) && !ggml_is_transposed(src1) && src1->ne[2]*src1->ne[3] > 1) {
-        // KQ + KQV multi-batch without FlashAttention
-        ggml_cuda_mul_mat_batched_cublas(ctx, src0, src1, dst);
     } else {
         ggml_cuda_op_mul_mat(ctx, src0, src1, dst, ggml_cuda_op_mul_mat_cublas, nullptr);
     }

From 083bacce14c1aaf9976aa40e8266cdc25ac749d3 Mon Sep 17 00:00:00 2001
From: "Meng, Hengyu" <hengyu.meng@intel.com>
Date: Tue, 25 Jun 2024 10:19:20 +0800
Subject: [PATCH 02/15] [SYCL] Re-enabled mul_mat_batched_sycl (#8095)

---
 ggml-sycl.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ggml-sycl.cpp b/ggml-sycl.cpp
index e5ddf4a346c36..db045336f1edb 100644
--- a/ggml-sycl.cpp
+++ b/ggml-sycl.cpp
@@ -4620,7 +4620,7 @@ static void ggml_sycl_mul_mat(ggml_backend_sycl_context & ctx, const ggml_tensor
     } else if (!split && src0->type == GGML_TYPE_F16 && !ggml_is_contiguous(src0) && !ggml_is_transposed(src1) && src1->ne[1] == 1) {
         // KQV single-batch
         ggml_sycl_mul_mat_vec_nc(ctx, src0, src1, dst);
-    } else if (!split && src0->type == GGML_TYPE_F16 && (src1->type == GGML_TYPE_F16) && !ggml_is_transposed(src0) && !ggml_is_transposed(src1) && src1->ne[2]*src1->ne[3] > 1) {
+    } else if (!split && src0->type == GGML_TYPE_F16 && !ggml_is_transposed(src0) && !ggml_is_transposed(src1) && src1->ne[2]*src1->ne[3] > 1) {
         // KQ + KQV multi-batch
         ggml_sycl_mul_mat_batched_sycl(ctx, src0, src1, dst);
     } else if (use_dequantize_mul_mat_vec) {

From f702a90e245499283d6de0b287701c723cda2a87 Mon Sep 17 00:00:00 2001
From: HatsuneMikuUwU33 <173229399+HatsuneMikuUwU33@users.noreply.github.com>
Date: Tue, 25 Jun 2024 10:44:48 +0200
Subject: [PATCH 03/15] Update control vector help (#8104)

---
 common/common.cpp | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/common/common.cpp b/common/common.cpp
index 1dc53265134a7..0ca7b4430f765 100644
--- a/common/common.cpp
+++ b/common/common.cpp
@@ -1538,9 +1538,11 @@ void gpt_params_print_usage(int /*argc*/, char ** argv, const gpt_params & param
     options.push_back({ "*",           "       --lora FNAME",           "apply LoRA adapter (implies --no-mmap)" });
     options.push_back({ "*",           "       --lora-scaled FNAME S",  "apply LoRA adapter with user defined scaling S (implies --no-mmap)" });
     options.push_back({ "*",           "       --lora-base FNAME",      "optional model to use as a base for the layers modified by the LoRA adapter" });
-    options.push_back({ "*",           "       --control-vector FNAME", "add a control vector" });
+    options.push_back({ "*",           "       --control-vector FNAME", "add a control vector\n"
+                                                                        "note: this argument can be repeated to add multiple control vectors" });
     options.push_back({ "*",           "       --control-vector-scaled FNAME SCALE",
-                                                                        "add a control vector with user defined scaling SCALE" });
+                                                                        "add a control vector with user defined scaling SCALE\n"
+                                                                        "note: this argument can be repeated to add multiple scaled control vectors" });
     options.push_back({ "*",           "       --control-vector-layer-range START END",
                                                                         "layer range to apply the control vector(s) to, start and end inclusive" });
     options.push_back({ "*",           "-m,    --model FNAME",          "model path (default: models/$filename with filename from --hf-file\n"

From 3791ad219323389106dc3fd80814eb5bbb7b80de Mon Sep 17 00:00:00 2001
From: HanishKVC <hanishkvc@gmail.com>
Date: Tue, 25 Jun 2024 16:57:35 +0530
Subject: [PATCH 04/15] SimpleChat v3.1: Boolean chat request options in
 Settings UI, cache_prompt (#7950)

* SimpleChat: Allow for chat req bool options to be user controlled

* SimpleChat: Allow user to control cache_prompt flag in request

* SimpleChat: Add sample GUI images to readme file

Show the chat screen and the settings screen

* SimpleChat:Readme: Add quickstart block, title to image, cleanup

* SimpleChat: RePosition contents of the Info and Settings UI

Make it more logically structured and flow through.

* SimpleChat: Rename to apiRequestOptions from chatRequestOptions

So that it is not wrongly assumed that these request options are
used only for chat/completions endpoint. Rather these are used
for both the end points, so rename to match semantic better.

* SimpleChat: Update image included with readme wrt settings ui

* SimpleChat:ReadMe: Switch to webp screen image to reduce size
---
 examples/server/public_simplechat/readme.md   |  37 +++++---
 .../server/public_simplechat/simplechat.js    |  79 ++++++++++--------
 .../public_simplechat/simplechat_screens.webp | Bin 0 -> 21376 bytes
 3 files changed, 68 insertions(+), 48 deletions(-)
 create mode 100644 examples/server/public_simplechat/simplechat_screens.webp

diff --git a/examples/server/public_simplechat/readme.md b/examples/server/public_simplechat/readme.md
index 2dc1778255256..21410199f6016 100644
--- a/examples/server/public_simplechat/readme.md
+++ b/examples/server/public_simplechat/readme.md
@@ -3,6 +3,13 @@
 
 by Humans for All.
 
+## quickstart
+
+To run from the build dir
+
+bin/llama-server -m path/model.gguf --path ../examples/server/public_simplechat
+
+Continue reading for the details.
 
 ## overview
 
@@ -14,6 +21,8 @@ own system prompts.
 This allows seeing the generated text / ai-model response in oneshot at the end, after it is fully generated,
 or potentially as it is being generated, in a streamed manner from the server/ai-model.
 
+![Chat and Settings screens](./simplechat_screens.webp "Chat and Settings screens")
+
 Auto saves the chat session locally as and when the chat is progressing and inturn at a later time when you
 open SimpleChat, option is provided to restore the old chat session, if a matching one exists.
 
@@ -170,17 +179,23 @@ It is attached to the document object. Some of these can also be updated using t
     The histogram/freq based trimming logic is currently tuned for english language wrt its
     is-it-a-alpabetic|numeral-char regex match logic.
 
-  chatRequestOptions - maintains the list of options/fields to send along with chat request,
+  apiRequestOptions - maintains the list of options/fields to send along with api request,
   irrespective of whether /chat/completions or /completions endpoint.
 
     If you want to add additional options/fields to send to the server/ai-model, and or
     modify the existing options value or remove them, for now you can update this global var
     using browser's development-tools/console.
 
-    For string and numeric fields in chatRequestOptions, including even those added by a user
-    at runtime by directly modifying gMe.chatRequestOptions, setting ui entries will be auto
+    For string, numeric and boolean fields in apiRequestOptions, including even those added by a
+    user at runtime by directly modifying gMe.apiRequestOptions, setting ui entries will be auto
     created.
 
+    cache_prompt option supported by example/server is allowed to be controlled by user, so that
+    any caching supported wrt system-prompt and chat history, if usable can get used. When chat
+    history sliding window is enabled, cache_prompt logic may or may not kick in at the backend
+    wrt same, based on aspects related to model, positional encoding, attention mechanism etal.
+    However system prompt should ideally get the benefit of caching.
+
   headers - maintains the list of http headers sent when request is made to the server. By default
   Content-Type is set to application/json. Additionally Authorization entry is provided, which can
   be set if needed using the settings ui.
@@ -197,10 +212,10 @@ It is attached to the document object. Some of these can also be updated using t
     >0 : Send the latest chat history from the latest system prompt, limited to specified cnt.
 
 
-By using gMe's iRecentUserMsgCnt and chatRequestOptions.max_tokens one can try to control the
-implications of loading of the ai-model's context window by chat history, wrt chat response to
-some extent in a simple crude way. You may also want to control the context size enabled when
-the server loads ai-model, on the server end.
+By using gMe's iRecentUserMsgCnt and apiRequestOptions.max_tokens/n_predict one can try to control
+the implications of loading of the ai-model's context window by chat history, wrt chat response to
+some extent in a simple crude way. You may also want to control the context size enabled when the
+server loads ai-model, on the server end.
 
 
 Sometimes the browser may be stuborn with caching of the file, so your updates to html/css/js
@@ -237,12 +252,12 @@ also be started with a model context size of 1k or more, to be on safe side.
   internal n_predict, for now add the same here on the client side, maybe later add max_tokens
   to /completions endpoint handling code on server side.
 
-NOTE: One may want to experiment with frequency/presence penalty fields in chatRequestOptions
-wrt the set of fields sent to server along with the user query. To check how the model behaves
+NOTE: One may want to experiment with frequency/presence penalty fields in apiRequestOptions
+wrt the set of fields sent to server along with the user query, to check how the model behaves
 wrt repeatations in general in the generated text response.
 
 A end-user can change these behaviour by editing gMe from browser's devel-tool/console or by
-using the providing settings ui.
+using the provided settings ui (for settings exposed through the ui).
 
 
 ### OpenAi / Equivalent API WebService
@@ -253,7 +268,7 @@ for a minimal chatting experimentation by setting the below.
 * the baseUrl in settings ui
   * https://api.openai.com/v1 or similar
 
-* Wrt request body - gMe.chatRequestOptions
+* Wrt request body - gMe.apiRequestOptions
   * model (settings ui)
   * any additional fields if required in future
 
diff --git a/examples/server/public_simplechat/simplechat.js b/examples/server/public_simplechat/simplechat.js
index 25afb25649139..8e0df3b61df2b 100644
--- a/examples/server/public_simplechat/simplechat.js
+++ b/examples/server/public_simplechat/simplechat.js
@@ -222,8 +222,8 @@ class SimpleChat {
      * @param {Object} obj
      */
     request_jsonstr_extend(obj) {
-        for(let k in gMe.chatRequestOptions) {
-            obj[k] = gMe.chatRequestOptions[k];
+        for(let k in gMe.apiRequestOptions) {
+            obj[k] = gMe.apiRequestOptions[k];
         }
         if (gMe.bStream) {
             obj["stream"] = true;
@@ -740,11 +740,12 @@ class Me {
             "Authorization": "", // Authorization: Bearer OPENAI_API_KEY
         }
         // Add needed fields wrt json object to be sent wrt LLM web services completions endpoint.
-        this.chatRequestOptions = {
+        this.apiRequestOptions = {
             "model": "gpt-3.5-turbo",
             "temperature": 0.7,
             "max_tokens": 1024,
             "n_predict": 1024,
+            "cache_prompt": false,
             //"frequency_penalty": 1.2,
             //"presence_penalty": 1.2,
         };
@@ -800,51 +801,55 @@ class Me {
 
             ui.el_create_append_p(`bStream:${this.bStream}`, elDiv);
 
-            ui.el_create_append_p(`bCompletionFreshChatAlways:${this.bCompletionFreshChatAlways}`, elDiv);
-
-            ui.el_create_append_p(`bCompletionInsertStandardRolePrefix:${this.bCompletionInsertStandardRolePrefix}`, elDiv);
-
             ui.el_create_append_p(`bTrimGarbage:${this.bTrimGarbage}`, elDiv);
 
+            ui.el_create_append_p(`ApiEndPoint:${this.apiEP}`, elDiv);
+
             ui.el_create_append_p(`iRecentUserMsgCnt:${this.iRecentUserMsgCnt}`, elDiv);
 
-            ui.el_create_append_p(`ApiEndPoint:${this.apiEP}`, elDiv);
+            ui.el_create_append_p(`bCompletionFreshChatAlways:${this.bCompletionFreshChatAlways}`, elDiv);
+
+            ui.el_create_append_p(`bCompletionInsertStandardRolePrefix:${this.bCompletionInsertStandardRolePrefix}`, elDiv);
 
         }
 
-        ui.el_create_append_p(`chatRequestOptions:${JSON.stringify(this.chatRequestOptions, null, " - ")}`, elDiv);
+        ui.el_create_append_p(`apiRequestOptions:${JSON.stringify(this.apiRequestOptions, null, " - ")}`, elDiv);
         ui.el_create_append_p(`headers:${JSON.stringify(this.headers, null, " - ")}`, elDiv);
 
     }
 
     /**
-     * Auto create ui input elements for fields in ChatRequestOptions
+     * Auto create ui input elements for fields in apiRequestOptions
      * Currently supports text and number field types.
      * @param {HTMLDivElement} elDiv
      */
-    show_settings_chatrequestoptions(elDiv) {
+    show_settings_apirequestoptions(elDiv) {
         let typeDict = {
             "string": "text",
             "number": "number",
         };
         let fs = document.createElement("fieldset");
         let legend = document.createElement("legend");
-        legend.innerText = "ChatRequestOptions";
+        legend.innerText = "ApiRequestOptions";
         fs.appendChild(legend);
         elDiv.appendChild(fs);
-        for(const k in this.chatRequestOptions) {
-            let val = this.chatRequestOptions[k];
+        for(const k in this.apiRequestOptions) {
+            let val = this.apiRequestOptions[k];
             let type = typeof(val);
-            if (!((type == "string") || (type == "number"))) {
-                continue;
+            if (((type == "string") || (type == "number"))) {
+                let inp = ui.el_creatediv_input(`Set${k}`, k, typeDict[type], this.apiRequestOptions[k], (val)=>{
+                    if (type == "number") {
+                        val = Number(val);
+                    }
+                    this.apiRequestOptions[k] = val;
+                });
+                fs.appendChild(inp.div);
+            } else if (type == "boolean") {
+                let bbtn = ui.el_creatediv_boolbutton(`Set{k}`, k, {true: "true", false: "false"}, val, (userVal)=>{
+                    this.apiRequestOptions[k] = userVal;
+                });
+                fs.appendChild(bbtn.div);
             }
-            let inp = ui.el_creatediv_input(`Set${k}`, k, typeDict[type], this.chatRequestOptions[k], (val)=>{
-                if (type == "number") {
-                    val = Number(val);
-                }
-                this.chatRequestOptions[k] = val;
-            });
-            fs.appendChild(inp.div);
         }
     }
 
@@ -870,32 +875,32 @@ class Me {
         });
         elDiv.appendChild(bb.div);
 
-        bb = ui.el_creatediv_boolbutton("SetCompletionFreshChatAlways", "CompletionFreshChatAlways", {true: "[+] yes fresh", false: "[-] no, with history"}, this.bCompletionFreshChatAlways, (val)=>{
-            this.bCompletionFreshChatAlways = val;
+        bb = ui.el_creatediv_boolbutton("SetTrimGarbage", "TrimGarbage", {true: "[+] yes trim", false: "[-] dont trim"}, this.bTrimGarbage, (val)=>{
+            this.bTrimGarbage = val;
         });
         elDiv.appendChild(bb.div);
 
-        bb = ui.el_creatediv_boolbutton("SetCompletionInsertStandardRolePrefix", "CompletionInsertStandardRolePrefix", {true: "[+] yes insert", false: "[-] dont insert"}, this.bCompletionInsertStandardRolePrefix, (val)=>{
-            this.bCompletionInsertStandardRolePrefix = val;
-        });
-        elDiv.appendChild(bb.div);
+        this.show_settings_apirequestoptions(elDiv);
 
-        bb = ui.el_creatediv_boolbutton("SetTrimGarbage", "TrimGarbage", {true: "[+] yes trim", false: "[-] dont trim"}, this.bTrimGarbage, (val)=>{
-            this.bTrimGarbage = val;
+        let sel = ui.el_creatediv_select("SetApiEP", "ApiEndPoint", ApiEP.Type, this.apiEP, (val)=>{
+            this.apiEP = ApiEP.Type[val];
         });
-        elDiv.appendChild(bb.div);
+        elDiv.appendChild(sel.div);
 
-        let sel = ui.el_creatediv_select("SetChatHistoryInCtxt", "ChatHistoryInCtxt", this.sRecentUserMsgCnt, this.iRecentUserMsgCnt, (val)=>{
+        sel = ui.el_creatediv_select("SetChatHistoryInCtxt", "ChatHistoryInCtxt", this.sRecentUserMsgCnt, this.iRecentUserMsgCnt, (val)=>{
             this.iRecentUserMsgCnt = this.sRecentUserMsgCnt[val];
         });
         elDiv.appendChild(sel.div);
 
-        sel = ui.el_creatediv_select("SetApiEP", "ApiEndPoint", ApiEP.Type, this.apiEP, (val)=>{
-            this.apiEP = ApiEP.Type[val];
+        bb = ui.el_creatediv_boolbutton("SetCompletionFreshChatAlways", "CompletionFreshChatAlways", {true: "[+] yes fresh", false: "[-] no, with history"}, this.bCompletionFreshChatAlways, (val)=>{
+            this.bCompletionFreshChatAlways = val;
         });
-        elDiv.appendChild(sel.div);
+        elDiv.appendChild(bb.div);
 
-        this.show_settings_chatrequestoptions(elDiv);
+        bb = ui.el_creatediv_boolbutton("SetCompletionInsertStandardRolePrefix", "CompletionInsertStandardRolePrefix", {true: "[+] yes insert", false: "[-] dont insert"}, this.bCompletionInsertStandardRolePrefix, (val)=>{
+            this.bCompletionInsertStandardRolePrefix = val;
+        });
+        elDiv.appendChild(bb.div);
 
     }
 
diff --git a/examples/server/public_simplechat/simplechat_screens.webp b/examples/server/public_simplechat/simplechat_screens.webp
new file mode 100644
index 0000000000000000000000000000000000000000..ccea44396051686b97220b0f5b6b9beb63706114
GIT binary patch
literal 21376
zcmd?PW3w<kur0W4+s3nP+qP}nwr$(CZQHhOYu-6?=F|NN_e)l~QmJ&WbXB@KOHo2p
z)LjVxKuttYUR9o*!1}-Wy*XeGASD6tJ|J&`V763AQ4uNe!X-jI5~PLgH%jM?u!G$V
zFF)=>Zp<y82D@yVBrZNd9`Q5a_mk|J|Ed+2`))2Kj#!z;9CoaC;W1!2_(%CM4*+$(
z5BN{p;+{XB^&jY0#E;$*{#dWs*W6p)R^M$O%b)D2UOkVbuiYE*^WF|0@vo)t)Gzth
z+zEUU?~h;hAL<9qBk23v<J_a(Rv$qxTkl!ZljNVqU*0!-JzZ@c|6khA*zntD-*w+f
zPsI1v=iXP{z29M<HE%y(lAqY0#2fG<-}~On-l?Cj51DV;&)3h}TitkHJ>9Y2a^I$(
z`ybnv!*kR3*>B&I-jxrnZ_KaKpWDZuzusBi*59*V?CbL1-(S`~`R#x3-3Rs8&iD6s
z7VoWDagpp<-8MBF*nLj*{|y1v<5(wIC}OAKR>4IbvQjH1y}Hh*X9BwRy)()L8~%Dp
zF{O>buw4#_CQXDck%u>S&Nft^j(|meh;+5ovb+6kc`qiYDOW`Gt+SQz((DUwLzB`C
zV~fG)TXIeQSAJC+@vXB7S!e1wD2;3mw$T&bcKea~v5Lu$D)2RgF=08th|IRLU+0ww
zz@VVS?-6aoBBrmb9CQQtZq*MZx0fC`<_=0ns02Ev7s7$LtEZq@_E6O|nWW_8ZJSrs
zg3{9$c^z|!VfloLU{edq*-d{eUvYII6fsz0<v9mYZCKE<+5r>VDVYbm@&`ti5kC4y
zR{UNnpk_UTISkI|NGOh%q5?A8s%;6FWQ6=7TDl*5Tz~p>h=rTNE%3l-k<6W<H;c8)
z+ESOppCE5oyTZolAs{G#2qhGf3M1n9;NP8cT(oWHLl1!u7)P8t(_c8x(NYo=@tw86
z?EW1@&3dDG;b+^Y-0!?MgM3n}-mMqnk5$7|FHGLGT}-WAje+z!f7461^jYKHTRWmt
zX;6Wt+KM)iPOe_{AzcC7cH{TP_p`DW*jZ{6Sm5w4$51)dBrly+M5}#ZBtg|qn!g(2
zhXEGfJ|e0{Iz1BmCme7cf!#bOSN@?>*?Tfkzi>mzheD$E8>NExS+Vnk*wn9pxb$Oz
zHBi)bO;el*Qd4cVbOkZFZ868j{Y&2}nx1`Ot}qJyH?o%+&7UK=X-}9wcd|~;XMCPc
zPb~yTzyL3yvXi2Fb9&PzZ#^7yOW8jcbF<4q(J;Up?ka1yST&WybH)~A7e*0YRD^TY
zJ-zZFS0+$p9>>2*CMJe0z$x*Hk>Njeaeb?`1X8081kle}z9EIlrfAdv<kPaCrxX|J
zWFNkybtWeQ>LoZ|`R_eXfw0jdbD%W7OQCJIQfG9Xg}A<B`QIiVEE$`7aSzY8j!#)n
z-Hso4CSog1(NA~+oi`U%O;iV1dpK_8X{v-AdPIM`w#OPC;tRn7jqNhr_+#WKKwB_<
z9G~sjoN{pp$)0c!q5;~!UM|22U2&<8qa$tqR+yv)`2N0hts#7EK`dgGNJ->uy@8<h
zV2ayojuA}W(cxfSUE?!guGbPugZa&RklmHDzJ}2}2Z61cK$(l;*htXIuIDuIo$$XD
z49}jhe-6KO+j{nPjFE{Z<Qj!pw`&WacOeG0Q@x`js-?K?Pes0VnJUQ%8$Qx=CZVq3
z(Hz9LO!{xUbPe_x`~Bi2%i%r)pI<$GqjYEA6$g$$3i4D(9eSV?BmRc2^+Q$cv&oN}
z&h*PFxIYv3EbzW2^2cbsAeWN$>weBDoXp#R`%p?mniFLf$EgXG{bkiG&)Xp;5v_xh
zbx0I+M_}H^E?|b<eCm)YmR@KK>1u!D<wF|Y9k!K#t%=8cMlBq%{2=jwh!rrCcy?L;
zw$!k>;U7UzwOK0O6Gm`R`6T6hv6cTdPu$sh)m;OiJgEw<5#jq>N!?uL;e?(lY2xoP
zsVNcfeDwkiE7dYuW{(D+1v@uQ53@yeKFW{lU~qGMM***sAdLf!sj0X1v2dY<2-9eF
zxZ-Y>=x>X!VayQEV7U+)gxp&`maiBw$&?ufOS=I;(#zDT0?o0ywe+o65>aiN08-xx
zd3NSF_{W*DYcdL*gJ^5oJI@SKdH`U9JTmOq!QIi>ztsO3pES<iWBUn<KF5q|Q5;yu
z@rHbBC`Jf=xkALjS72-U0$Jaq@F%E)fW`IXd#00}f$MJ>o)R+`azkAh;))tdGlu(q
znD()4IS%y77Y5~V)+e@x8l!iCCVs|&@rD!8qCauC+PY*TL0GC!ue;V(uneAa2V%MM
zKrB*s{ITvtR#sAX7YRsn5e~s~o%#L9PNO-%tOdjZ$utIWq1907GH<iJ(mI_D29`8T
zJgk#Ne5i8Jaw>hucfg^smXWvjovbh#=&j0&YJ<_k079oD$ZL@LOR|W2o*HXbEGVs*
z4J+2jDI{iJIn4>r1ScW0%$*1xv#W1r=N|s0DUX`5E#gQ%WFo-;{42Nv%P#K-D@j+|
zsK;&N<YEPD3_lMFptej2wHt-vUDN(mrWFcV7ed1u_3gR##+imbhV;S+=`#i}X_#Rk
zYMzRlHtzTXQlK_Q8<}yUs11SOm$xm#{om8}>>#Ne<@CLuJ$Cudl|p!9j9Me{H{bfG
zCNEPdsg<$0Nl)mvf*&UCunN2{0!=%<%eBLxv6Ygx^&T%XL_O?}=Lb_xNzz<*aQ3*?
zp`=UgUku1VV1ma=BsW_;`P62*viYS52S=BWU%zymKFl1}E;;9i)(V|y1&wB%;y<%v
zd`2~7d_8HHpRohBOa<~%b27V6@1&H#en!RWM2mAPD(0Wug*m47SQJ_~czXXQ+SHxh
zyaQUs4FamebLeJ>)~wa18L2ozbEtHu7vv_D^piND-I2$+x&7ShF>0UM|MgiJH6d`|
zAU_be9erob?%2rA6vAI^ISH&r$_Q9R$pwnQ>7xZ9%ZkG=pVmMKY`^StQ0+fRtF($$
zQ8e-G>H5T>wvm;yYgyw<b;I>?d%aGX(_+D^i;HE9jT?3Dby-<P7B%h4h#Qf}Vfx^8
zth01@TRKg2qsTbV>!f!|!xKFIEZ1!nuY0(a|K|E%O7zfl(fB)DM@!TK9p;VL<kxOw
znd+o0XN0l(oGUh#`};URZ*G_KY>*}2DmIQyx;$APAc6<oK9wB-_lQ?E){{q~vheqy
z5rrz%g)p*|lD2MK{@*;};i^!-UzV{4k!sh+CMcErT241$1b!=Zo;CyldL9vbs`gj?
z+Wx&z`f}!I&danvZCq7H%BNW333+x7N_>a{<$y>jFJ8WCRUFrFVfZhd|9iyqCPT(O
z-4tsOpbB*D1PrV`5px^o_x;qPe1&W!7~SM2cgQQu!n6N>j#3DRL3&mB{%4zgLrXs=
zm$3Rwk+rco6>QwVk_(!eeS)VHc3Uaa0wi2`KKwiTKBAV>nYL9PK)7OZ2{MSd_$<5u
z`e%Aittwc^NUN-ssL(8eJ*^{1-KQyj7cP9gT^&zJ;0Zdcb*ulX^yTGC`wVODaf!!j
zp*6$3Ip|=v64w<_X@1VfZDq{1`O#Y7k8Zv(I&`fs%8-G`vGGUqZ5X&Y$P+$eQK0*T
z6n=~RR<{je$vz-8S6~nXo&nT~Chs*}fO!X9?@jP<O#@qZH_ohGj~YB(a+nuk_Rn&a
znqj1NBroksWq7dctdylAoWGzVcIq`D1?-e7LK-A`aYOwn8=x%nfMd$$@GXJW8<_$H
zSwaR~%r_D)KO5G3Iw*Ws?C@TAg`wvuNx~IxF(mdwg3E8R-`^X>#NXcwW@0A93^%Tq
zfZP<X!eSe!$Gln&s7L=fl`-=CL%u)2@1LtiXob9uiiFK#zHXo`{!a`doCmR4^rPq;
z=5b6e(*!oRSt6_Z9EtTquK30gcS6(X|18S|mVgs~2Yj7zBktXqVPXVP9i(0Zm~945
zZ(B5vc^2q+$r6cL;eY#k_&oxi{EdUB{Sn~l{h#L7Q8V(7B{cnXnSBSIF)1T$v>2`2
z7o}X)+AXJau~qV=HvKWs7Se#NHEk=V+@TuqU2<AnD6IJ}|Fv^uLwkuyH6?Ec((2>k
zfh-0HM6tT_&2gdUBx?%E$D0K9=@36)#TiL({CBc>vGTrHee`DYA@YSidpIX^#ekKz
zZxYYHmKtzP>f{NG6xvpHb@R}~$~hE5<wuIgWk5>N@&S*q0td5_PVr<SLhu(L;9i3G
zqpprRxl{bsxbEag(mbgCo_3p~p&jX>X1{JxA#)L1Ch!%Ou;hc(LUFh+F7Fs%Y<;ME
z{h4{a*o9RN4+Zpp>QMWR#}@O}T_4FycGJVT9_L(9xIm48%=k$dT+uPEbnV-F6*v43
zC+&bvB8p0)?ErddAS*?9&pmZoSo+zDK1P>{JywuOG$HNVA>$2C%XX|hn!<U4SyyI8
zex1MwH%JY#DelYbR>3Eq&HSk^uf^&%NtdC(;AnI}9Lft`NPtnQlA-W)f0kl9%DUH5
z+Kri13S2p{;{tvsr21`sa#9LBBbayiilp#9T9id86!!9ZLZq*0L=qF69O?<1orB6j
zw;e9R{rL4WIZ>e(P*|vf3fXQ^A4M6w+gakH(NEoYey{MIIW-9n@vt+)1X*h&%^2;m
z+yn9ft#y?1+X;{r!w13Q8z_D(*^m_l<iF13;M&x(r|;t>UT*Y>!8};7r$x4Oif7QI
zn*ck!KZjzHsgTwqXFbon7KPHo0c;noSkK=0YCo2C9|!O$^+mW((fQsmA>-<JYfBFv
zf3*>wXMFjOA3ua`MNisL@EMo3Rw6wr3Fm%Yu53H`YS>sPSIo;4!M_?=cFP<dTHO4}
z((H5Nj)CWl_Qqca;xSw*V1=wuH*NMG&+@*E$@E78>c@{Jxg?dwGZKV~&Df<WlR$wc
zyxfjIxaPv(KG<7h`l+bXI01w@Z}NhuVRnviXT-jjybO*ZCmOwF0V;^r;6hyWmw*-Y
z{QwLW%|@bm008t;L79eDedA)AFBQ~*DtSdB{*cUQ0_d`@R6u0(btz5W{ckNq2*u-B
zzqhh9PL%A#y@oc$WS;8JtA!Oh=}bq1*kClKX@t0BDfCz1KECPd2eKUyD#gVOor$Tp
z1V#c-{}S6ZE1dXdvv8>HO{Pb`-$xZkX|}O&(NAhytPFijjQv5p8ObQ5nkxObIGkMy
z<>lLdjVx+kf=RY*og&?@MIAPMac?`5)v6uLwhL{0nm(^2Ln&t^?*W66or0NX2*m2a
zj}aX47Hgw|PP26WjBQVLRpbrH%&|B(yg_ev=W@cwLh~U^B;wDi*mFVD4_3zJch3nx
zg(#}mhby8D1`>K>H}1cq_XB!yZPp6%Z6o^Mv31y%l$6N`i->7w%wV%7_q8RK-^i=J
z!RTDC*uN7=gwflEmXqUj$91-i_V>r){zdHwZ&+-(8h_RZ*RZ9ZGE51Bit%2ZQPNRg
zmxYb%+o92A@(HkUZQ!DJw&%OO<V4EYUeO61(#xqcMKDcIaHD=Z2SoNBcO7iY`aVcS
z9`&tOw8XTXw%a<Yp{n7k>_gR7Oj9WsfzY5w=2q)C^&dW)>!j_y?EvZ-PHS&Ftn175
z_aW@>hn1mLy)^>!IaA!4>NU;`_4QK}3?z1jHS0=rTr)@JaG=(1oz9FR5t7YCorI>!
znpN7}uw)y<&H_p<DOyZaV1hOZHoPL9zZ|}a1EdW|s;ia`&v}^Ccpxk&p+yAXc2tKa
zdq|hb3G}X;KT|Kr`1n;uwq-CFv@|;t(@*W$HT-pP<b%$<LI?Y@kk#$oj?*!>eF}sn
z&s(EH$Ie@VZ#s|igWWmsHJuqU9FzlLDjIWK&XNb9vAZ}tvvooM%1>IcGEf>}*c;mw
zwR-arvlG~bX}P>|z3_w5WtUB){U+_aM-9<>-e+JUIWCZl*D)P2&q?vHtlIY#p4zL_
zLaaEXD1tEV<n!fM8Nv7jl*vIZ*E6M`@n5$*dhAdldrE3U#R~>bGr91sdo{VplvnDu
z{b{`6{a<h^0Cg)PT{(x-dXtH>q#pT<?3|wqm+|l#Q>T;gmtL}|S6saQFg1S4e^pM0
z=b`k<&Yu7=bA&o>7h+Dex4D_;`L6ogQ~lJ=<<~%^_JNBXCX<lLUX%kjae+}^ZjvT>
zr3r>ifa6f^8(r9Y@H!aD5x3U|dMEX{A?bNAt!ZTZj5Rw+vy+m2XhmsqjW&5F3yhyg
z1OI$yqFvl6j?(P$jrFeT#}_G@+JJS%!vepW4|OO&+4yR}bvd}pdJ>{HyDK0J$OVqj
zw8Y9L1d!I5W{|J>L85<8wux0GUJVUNzjLJ!oSvc+flAyJMO9pu>Dt@TuB|)10o8S4
zuN6s8gQT`_!l6ZY%A{O!^7z42`zrlbIv$*bFeo{F02p-|NO%hVRF4JA(q=+4xyZs+
zkI>vc7u^Y0H(`j}GJ#wsh{yYAvYF5f9|{<Kv?%r$+}nEDvwM+2lXSFsL+Wzd5?o*h
z`+-9dFM!~jT@L;V7u63G%@;KP#6Bbjd$s<iByuPbYDvqVk}%PX88lpCrC!KUAtrqa
zDy)_=hQ;GB=K@~w1fML+VE@r-3t~vjyN{$UTs+DPx57LLe<1)8^L4mEtR(uRN0n?#
z0ini56b-7&mz{W#LBaEyHVu+o^p1V@bi}|3)~mXT75?i2ZwxLn)`av3(Kih;W@2qb
zIAspShO}7h05Y3Qr65`eS7Cu7^O06!?`*s-$Bi9f^jw-?>Q8hy1a$RR<Z%9gh_ba$
z9?L)5c1aF0Ca_f*mwPT8%xXRzcNe?n0H3nD9BR8i@Zwv#k{fZ>a=R#i6ru1;H{7r~
zzC*>TuK65r`i37FV32<CTP>Lb)4M@ik+N%HEEl3LIS>9Gq}BsLZ`v@CS0G!O?_LqM
zcGnlF5%`|yS0^95z)dgCNMs0-TtL7wP=~bH=zB>luHxroq*yj5be`$R1;Y?Bg^KIz
zicRkdq4+Gu-LJ-)Z#ro$8oVZ}1@sX)-Zl^uU%w1DZ^L;yX`1jtS^v10o0g!tShO!@
zX88BSi&KP!vf5Zz-_r*NRp@!x+l^Swc8G2u%75W`zv*ZJu^OLrFeF3MFVX&93>ESF
zuR(_{V8Bo2uh+p<d-%SYH}bK0RD@*d`gkEDSpLbQ<i^;1>)mHH{i|>yJ9*Tt5wz4j
zU9rfa<rx9;$pOT7fM$~g4VCfdOL$QVJu;Zz!2i6}qP~gccH5MAgiTQN-Hw(l(*+@{
zDN~Gn`L^nrpI$Q2<>#@8C6m6cE^-Z8A>JSdcy3z=jQiht$`f0{Y6crYc=o~?g3nqH
z)jSuFWJWEDP`O&jIQW4G4txzJ7q+du^*R!y^g))Y6wJEo!9;wGG|<jy%rX3aDx4VT
zssk9RU-n1aF2kG}S2HS|3MJLx{QAQbgx&->t2`MJVah{duz-Fs)U3b@lQ8j!7pNZU
z^RX4Tx^zRplx#l5Qyu#Sx77_{@{bi~H(pyA6A@dtNiPT+YTyqpDFrd4)TT>@pPJNj
z?p9;kW>Jyn!<u@{X_Q1DSp-H=)3bnjO3_Go7NA+G?-j*{lfL<SDxO<(g3iauskPJM
zz3RItZeq2ejZQZRypn^2Lc>_$%ep}uGKrbP__I`+PUQe}%{X4W#6rmX>e<7_lv?rl
zG6bwr(3wunsBjyOTE2!Yv~apWE4@x}-C?64yc-iiq*D@yX`@+2Tr@uFd|g_4k=pEj
zUfsPbmHc)ztdNf<Bjvuvli36Ow-auj09TzLbwy>)BNYd4kJ1hvix)b_*%3AOXBBq;
zNyYO9VRv?hOl3ImoDit3T~^7dm}6bddTNP)=k5=uFZujacOj-Pf<;5^8}(>23SURH
z?6b-$JkA)w;c9EV_{lN0)ez&qn}&b3q_S7aTQC(Ldvz9UKS}#lZSqLKQ9*r{c_!*F
z3nUz(Eyx8vXPA->>^%mM2I)5;XFuW|$^h0~Um#Tc%>ziMdx@2CYD%xqWaMytzVea1
zuH->Ksfb?<(~SnaCDAE|TvpkSHK(i~cfkpPyWr&1G*xMzge&#%n{I>k`Zz}Mh4z+X
z*&8Hlf0wSeVx2H2o^=`C%BFj%lNyz2gs_R2Fd1VGXm`vJuqhL_v%PxqPdkzFp1{wg
zBw8-*BB?nE0xUQHtu<URHMtrMT_dcsK@U6252RD{GQo$c931Xy)e95QEFdFHD}S14
z+?amNS}HWX%aa8#gK34xPY?~>2)c77DFVV-a)M7Phz$n;Xf5DR<be8l(8aEXiKgPC
zuvEhQ79skiZKi*#l8H3{-{WNp`5{ZJr|uXAu<4Y%zqg!*w<u+o@os_jR)TY>n{m+f
zneCu2=t&MCxWq=4goWEqbE$Kr*GfibWqb@IZDkUJ@$<t))9;Sa=1Q8Ay@Rjc?358#
zk)QXX^?TtuA(+4=S3?#P#l)I|a$&H{ZGdD)hw+r;?}a1Iz2KlfY`-5P8&1fB^Pg0c
zOPDB0a!pC1riel_r)ez-MRo^onnt%#WR6OhyW?mXhX#@{qigg0<TJyFUm$Jf%j(Kg
zZ#T#i)Vh?2fT6&J8>NHOYh4cMaVR^G*azM1nHI}}c(WEbM@pS_3qao$<a&FQOd3T{
zpYJvHEtp`K^V~nf^hSfR5qeA>xE4u{5D_!D4&Xapi#@>gBo&zl!ki)Q+Wx*M3jLo8
zNe!(hU^4*($+MU(#`QB{wm2D3%l$Ol=DYmAkow2=I%Wt4Y9Di1K1Vn&PF(_uFN%fT
zG_Y|v^&#JhP9{JGM3{P*|Af(?_^Xfcm6!#&)#_Cq^khYwMuMj>aGEcH@;IMjlSX?N
zG38d9ss^q+ofwI|Mz!{Z9MtI+78MielY7cE(q5eVByPGWd@4EF>Tq2mcg>sAiOdrs
zx%h-pBQ&|yBkIvS%x)WV(pbS_A`H&lCs6ltbxp^3=s@j)4K^r8$yT=d`Cr&jQm}D6
z>2yM|9T-L3aN>kJTsEr_I=2I^f^#tRVr>0zvs*l+WBC17*m+$xw~eJ7Mm#}N_nRex
zv5i1CC4Z^N2;Q6+A<8(IQmx25(%%Ojbf7*wB3Zm|AwU1X4O?Kx`<L0)iim1K^H}{5
z{>B(Ko{>ABi6%2E_K2F2u2y&!j?@~Qa(x+x<|-Sy;WYDP60s*Hk~ho!xu5A(Pd_?!
zjom0ecOop6(QEFO$_{<txh4tI4#2Xo?i{DB1cbgahwnsm^dF`j;b^QZKw@<TTDzHu
zVz-b&50!6QWa=mLaz0;^{5E)@x?BJW*xR{YD_&LNbdZFStrR%XcO8<;kfT-+QZZxK
zb^VsZQux89`sxLnp`{UNZ9XU|50wvg;)9#r#G;(=(cb=L9bYZ2lazx7WWxL%CTdYT
zhqE9yi^TG4PR^Y)`xdK#8ccSsbF(<$6Hc)X`a-aO;HI1mC2k;;haT1)k!>%o?XwM4
zGiU&!kV-RTl2zMRi*d7E%2gkd9#pZ>Ogx%Ba>+`7u~j*`NBu$VH<stN@u|kSuy336
zs}<2lM-<pd?<zrX{MFCH32qpC!#YZokC2=;tUtR#rAD<`XzNvq+Wu<G3TjBoi_<bx
zQ$jc~S#79i#{}%b(-8I%m-*1bdM*}@4<Be*4eV+tZ4bBAPswpqSbb?p+ulM58nJ-J
zUjl>hrWTIF(4G{5u0@*FEHqg`Fw55LUUMJx%^yOhOc~V}F?>>hYXD6)OhtIK1Xvl|
zA|nu=nn-qh^T$4fOR;+&xDC0huOinfDhp10fsk1P$P2!*fBzHh$#h*h@at5$25sd#
zI4Q29r#`<F!9;y?Sil&HcOvsNHFbaHJks6+TgCLcU`*H;;mra^nPEY3aZ7KxluE%g
zt|S)TTB76J@u1gp?zl4K_V5sc{TC!{UwXh(B(u@&z=+vtyHsw#*-At%86k9Q<EZP1
z`OPGE{GkB57yrWWDVkS<<eUP=sZ}la93+q`X^kNYc;WS%+ukA|RycoX-yov_Z-39<
zNmB{o+S&kxr-y=SWMx%~<Qw=7BB*%aP_hG90jkxDWRu$7TOH(4JXv5c2Jed}GkE<$
zzIH|zAndUj=zBv}yfNU!{iLT9*rOrn3ZpSTT~eK+gY;C>wdzaqepXfOKlJ*NZ*UBw
zpp0BrjaWhH`YVoV`kv9%2b<Ht`vf)Tt_$-l;oe}=Ekioovd^LA#e6NIhU4wC%?51N
zg_EH{YbGQVijc*AIxf^0*jWma=tIO)Ff?Qzns_Nq*R$GWv1#CBdKd)*+nG-|zRx@s
zb2MXN_5|raf3@%ZM+eP%sbYeA2zk9SEn6~WS3<+uU%^{#Zr@=%6+RRMic}KGr!zQd
zqvd0p&N~P$+}Iuu<dnuk<7P?rC|BWM=X}cVQuiv*X7HG3Uex5q$>a+4tk@TghtKAL
z`3SQhk2~d^wQi95EyGoi@2JwuBt@wn7w+Tiji!eq?cA;6y!x<E`J-5@V}YDV>>wiX
zMAjp?%^}{qJllEV<H3UKmCQZ2e2gECD^t-9sOt$LFo12j>s7|&>00xSY7)tD$NW%6
zQ%r?NL>zDj(;K0wtJE3kqA0E|4+rv~=H|^DR1)M!CrsIwC$0OhC+_0W2$F8I>;+~>
zE@!psQTiFhyrDIxr<GM2_gFsbMpLpxf|1<=xP7x}))-j~BTBJU)@8;0D6}0;T_P4D
z6ph&Thg0k3{pRUWyP<i$Z%=r=y&n(}fVlrCqoZI<Gk!JOx@$i=y{cZPWBQMY?ecQC
z5#ZAUh*HmsBV-NR`^pZP)Xd-MXFL)FJNXiBS*+AFLt);U7#G%0i!AUezhmfgtfT5G
z()&re`px9qV!eE6;Bn`uF%$Vd=5LKKGcV!-hXu2x%kQGjuQmZep+c={DVVmN%qQ(d
z*|tQ#^N3pKEvon0O5yYY_vDlD@QPP=mJff4wv!w0vm7WUXD1pw45*OXkxdL&od+p7
z90_mriLBO*>lh0N;|sw2X*yUeut-KPWjnqp;5uMmtCD=9>;P(EF_TbV{5qfYp=5=B
zRLPGshU$^NY3&G|XX{dKZzG1}@E9mXK#%0_3QK^WvBJ7=vyhR<_Gds5tg>0toi0#U
z-$@v?^UZ|a11ZudC>#P0V-<h-U>AoZKhKb{toc%hQvSmalX(YTX#@g~GQ0)vmr(T|
zPsTW$gba~#J}2PuZ9_wo*b=v2Vu5ir;w%zwkRA;VyUALlgN{Fr)cP<p)qS`Gj4#gG
z48B%}U0xn?OR1D7NM~XK7f~?aVk$9MbgPs^jFvE&lV$Jo$k9w`&u-&Ey{{)cKsP09
z9(NrsZm%1d_Si_YSVhp!gQZIOkT}evRgmAqMxE*oiHQ#1A5hKB1=~RYb1E9cvWuY~
zDflp6`U`=@?ix~Psbt+8o$#{eC$*qJGqZjVBU1-DBS6Z8rp_%SgC{Sxx!yH=TfO8W
zkyz}x$tpIt_+ExX%h!57JmaTXzMyog2V2M1mxg#9w;0#tx~Rg3re|JgojnYZ-{IWr
z{sY#KlH~Lza9-%K460tpC&O3rCd4wztq9Q5ch<e;(|X%uQxLMR@sweHlw01V@F$XG
zxIzOuwoMtBxr&ZCP;rS5HSS9AOo-MnQ;q%ayQi>{LXJMoshWp}DSR-@Berzz@>gs@
zkN-%+W-mV1vZtoK;V-k_pR)Q@-ETi_T@CAHM>b0No>gYs>$Q+2kc5LKf;*&OZL(mJ
zvC|jn`5!3u_21jyF+BpF!lK#Z_1Yz5088?3@MHh<lpKkE8!jjW>c&^ZNd!Rk0BA22
z?%87$X57;r3J$N=b3TcHD%rs8^U%G{r;k?dBw&>;;=@1Q$+1|9UjZ`woyTy^I$=x6
zj5^_AlWrPI%<wM7D}`yE;5sXDp-zxlo<2dG)CZCmD<MbsU6&SmGwp-viH;Ty&Y?#d
zk>voa5z!(*7<mF{FSo?*0|KtsT^F}3(}{I<rrskSAgRv18(?)yW)Zdi{lfB(b>Rr`
z{!8+8T_Bqjxmzz*2;HWT88+gf(l0==L)XR~AHZ~n6x!d@C};5RSp^$4c#zmO4}nSH
zOmQssKZ0n1EZ6GXig2`n_2<*x$Z&3Cp^GrxM*XCTDsN;a3S^k?pfioggfK2uqCJPM
z(;NN5_L}BwG-CN<Rplb)%qp+*lX|%(v+AcY%mb?ZZ5n}LXlDYa+baMNrxLT2HXrs5
z=1v4t=VxmpZ;1t_J{>K{Oh{y8V7mb{EDWtT)f|rI$V@LS6BnfbD#-_&&t0@V5&J+u
zXx#$No?TSgZQvj{6U^u7LFFvvj7+ALPZ1IBHt45c$6kR?nBBlFw^x7(ti=>2NTBc^
z54n=Wn4oxm0Fr`9o{D{rD2KYMoskS<=IEe6RJFuSMbBN!6Y_y~q>``9yFeQvk2<gx
ziJ21JOBmT7tRzD0MRQ)E)~Ew+-<2RBsa_XUZ?;Gt-qVo%x;mUKwxnaC6t9M)Am1%E
z&-+zyCS?QPBfgLjpByqxCQrT#mLd7~qiSnu)uS*tw~V`_kKG6|ev&3nIgOF5&C7?|
zsZ=voJ0L{W$9@+BDZe5&^U0o7AB^7`9bfybBOE(=8seJv6kG=CijkKt$ZCj8c05H5
z-bJOF;*3K?yG)h+4DX$!S6th0oE$K}B*PK;V?BQlq|>+Zcsry7S^%4jLz?*Cm;lqP
z<J0rj($f5`tt0tX>|1javJY?6g;a@IakE{XY)c9PZV`*^U;I(x+;Ha&nv2a_oJhZN
z@6$Gb&hlB%1t;e14oN|lODxI$NH92DKt8Z4xDsg#{POTqDHGgPeRD*lD~9k?4qohN
zF0o;CzHX7i#4~j&UW~})a8U4D>u<12Ku3b+SPwpOYixAtzp(j(bX129e)uVoH^?2P
z?o;}L952bqQ85vGT1x98h3NcoLZ6K~9L26Qq&@Xu%A7Z*?j83(nEF6$k!hM$TU}G5
zbbB=BvvCaIiX(V+w`0-Rt&Xvu7lWh`O!^9<N27ZdEOTjyQhlq<>ZCHDzdCfAjip(>
zg%JNeTpXBoi~I|u3rbMsp-cnJt>-J4o4cvrS)W`t1DR;?@JLgUMoRUB`1?Vg5pYHI
z@8t-zJ6?C1Vf+~O?&+oM&w1X5_&kpEfo7o6W!U_wxq=D@HlBGeYtPAPJTp<bs+&HW
zZcFxpY|L>4)5d8mybL6OYG#c0J&G7L&Q)KtF$N0wg|N4nOh?)Q)KpduBLYw3eg8ON
zNa+)an^fYDZWDzUOXEprGiHKiOzpA#V9FULsy<ZxM39wWdHMSo9f1%EdX9K}aH&18
z>uU57q&(1YTTgk6Pj@hO5bvBhI+4{E_Q8EPkdlp4Z9+32C@Z^e&&_)O=`N*Vs5(=F
zsMmKUrOR{J@@+^jFdJp(xUAR=okW|R9Rtx=U4;W99u*b4r_3gszkr|i4_!*e837}c
zLr&T7Q4iZGfFG5tu)<CD#*6U9?6z?^*n|OVy%k)J0(tEYSnU1C4bdG`he_J}RCKRw
znvPuT0~>9JScuGPq^^FNXeE|;Q+~S^rZNzf;3?&(o6|JMo*_cpNzdVr19m-Y0sfFa
z7NwE-RUYPL0PbXUS!ll-2>t8=O{PkKH$Pqchr)%PeaPXt<_gAE{t#)&yaAF&pz!<Y
zscAPY+-gw*v@Z8Rb~|q8x!baWoQ<XEgDJaXi7E7$SnJ}MZ`3~#hk<Ws=u)TNAn}PB
zA=dzxJjt0TIUQ;CsItNC(rw$|8ON2EkFOZBoJI)y$mH0r9z}ELehGG_=~kwEv?(_K
zh7qr;G~!A)zyq_6aOeyjAOV8F6^$w)BQ`_I8UKi`>xC&36Q^;CiER*T_#n7dB_hZx
z`!G>+ud7lrgMpj%m&%3LwTa7mMam4*eYr%u5kpxFWh(*>?*~7#$vkZ#N-&}ee(0{b
zy8{((=#iaQUGsJ+3v7ZOqwW1gS}+aqYxCv)Q5@VfCXcgB2V^dD$7GyFaZ;_DSXN^v
z@ZjghK{_lEK`d7@w^m{gR?VG^Bf-oLVm$=bqLY!6!)wwk$pRW^4_IiY+VV9Dip&fO
zr1~8(Wlbwn65g~PCyTaovc~anHmx_<Og=1jeXE#1o+vDk`hR;yels^KG7{q5y-m8D
zim#&s0_RVD<_l?Zo>U>>oL$TOHsJtd_^EmbnCj{MCDbq7>0Wg?R=s6nDl$2Ls6m`~
zMEU#=rYr!+?bM6QC-ShM;K!KEe8o@7-KnWTch6041ug`0aqAQ}q^x*9w+oTGD1fJw
zteo}x261{>kxo)rSdx)eNH*9#0N9i9?GOCE!m930KPTk0r3rN>OmO|~Zb{AFtByMz
z4b%`HQ;6}(H`UP{BzQA((3N_Kzj?Rx<1X(!G34-GAj6uYw)=~}oLcv{j{C~$x314Y
z?w*)l=w3uPx5oV(a{U|G4y4Fg*p9gTs+%;%yn9iv0==l=B`s~fc$g#%-FDDGAeLBW
z*m$WCi=6pGCK@xuN5A5iI(w}cIM?mbAzS2BNwm-h=^CB6nMt4G(<v>qUTNY);t62q
zl@yTwsTJ6vBzE~emQSNA41IV&sK1)kMU>+GAl~@Jo^6H=?|4|C6r2!$-c4^*Ozm)m
z?k{<b{y-5)P_icLADT{skPv|15<uR6A+WoCuEMHzGPaDGU?*yLas4sjf96M@)Mt8}
z1-Y~mD!LR(#QDD>z|C{qv<cw^xC{K7!WNu@Rq2~m6`LYh>gE5Mg@{a6?TaODIhK~)
zx#oF@2)H}{kyB8Wdz{|k+jUTno_#0fprkmUhwOXiT-sc*<wVWktNN#n?V+OPMfAtx
z06*fm#(;|4^6@_L2h%;Sfp9-}jEI96!)#}LJf}-r51%AK8t!Sb<*CRApLFilKfp9W
zM65!qH1WfOJnz>4UM<(X`v5y1>aV2M*@S0js^VNdz+goevmFz{SGc9OLkfCD?B;{U
z_Ts>az{v{<geTz+Q}bc)-VJDa%%|RJ`XoM^w%Wa~O>L(*scoyCnIoZjfZMO%igc_;
zHR-P(`u97wO)|RKlkc07up@mI34|2+c$r@KZAnBF)SCSx?4LQ$tw2cTltB#&8<Fia
zG5wKLr6?B^L!;4$eHYgb`OM7t0abwNJJgH55E5c(a6fP*%n8}4+CI&Q4FZ%I4BG66
z-@bHlwB|wDR7h$gy9sXQo$>4A5lH=3p&y$dRFs7U{(jlFoZ>CwGLm`umLbkfN7NS}
zvc)_}`5RC*dLxf_MwBmpEK28EQR}XLDn(h%{Ct=zD5Kh?TVU5G-YE59SOtXqtkX72
z=-(c|3TfEUoD-;`CE=Y<GDKdWc+Ih3e7|B=edGW-?_A7{e#Av9uDnMuTn%ccV}Cz~
zEKd?PExre1s(B50RQ-F9E%B)$aSy4vwhwf0`A?w}_^rue*wInABsUFfg+Lr&7Fbb9
z#b>{|oWA5>1#qd<B{07Tkh@pE=pL}m_r1Wj77j=?Z-Yb_gwlJPaOH_YY}WRV(~c)?
zbUKv7m3#tzz};mQ#t#K9>el!PyFiqY88*<^ix5tI&(?(~x=|<6XYL@Ap>f6Tr{gk7
z6Jf$oFaqrN91yuoiaaqt6}t;<7NmG$z*zL{5|+OB<wb&|vE`;0=$T#bc0}+pmdT}x
zxeae-L28u|NX|<0pu`3BuURA9h#XGsY5*a}g)@Wu^ZNXj;?r%miJoT>KNBzVWAkfO
zzUi%xAbc(yt=pOzTD*PkMk|;lqdistub4W^Jya6NEpy;NT-f#fcU;ylw;1``R#Cbf
zJ6<>Z+2m69kfGT}qQ~F#!EAH)V!o<kFj{Qd2xshHLhpU_mTkBq9|bqp*^7+31CNm8
z0kMRg%$JjK1fwlqHMj2Cc-0D&K!@K=0zkE`%lf;3mJZ)_pmS*d>3rab^P-C4gc4cK
zI)cW@F2@AkDv68Lgra+DD?Jn`<zw<@6o~Sr{__zbBZ=efW5g@lmz``?*rkVRcQ}L9
zo0SafNldh@KCdztm*ShWf{Xxq;Sk~*KaW`(*u~tP^~bx?&-?Cx{EYMaZ$rtdlkh}c
zzAG}xVvO!)S|;K;U0CivE^{<1g|VFJWQ31WjyN~Q>AuWHlKS+d6&BPHWwQQ>p5eXh
z&kqF_TRSmUK)g8d$cn~p$<4(?v=~v}2>79A(mIvmwC&?hZa(#eC*F#o+)B5H{P=u`
z6ceXsL@Scae~>7kxae0f{iBsJmb?^Alui)3WJXAD45I1@gw+~ap12+!qr@}RHsR?D
zvbez|OI|)?paP2c=DYByZb^%bZbk*E>Lx%l=UOHbL0SK(7H&nwZv8p2hOuH!m<Cu$
zL6tKxu_~%lbaUz6=%}dwnkCAfm{2_hV5;gCpiHV<v^*)(<jMajg%t>egq*6j@fyHP
zm$7Jc$k1;R5x*XpRn~$rLd>w*P?-Rwq1Gtbi;kFon88W-8buQmGnu<Vm9S3uw@qW<
z%q(x(S+xh^<fp>K%vpL91$5N?#^G!wfn+;PG;{9m@~m|Sntg~mt|ah2ti`0v5=nHv
zc$rw?14zW_R1F!m^{NAlWG#kJGE{(D(+~p<2s`=f2etfH$jOIetqWs}l-a6tTH~-u
zo+Wqh*+2_C_}M~HoBUcjn5&%=2&~+Dm<O*tMFdjY5XCzkXTBmguWHg-#_>8P-(5D<
z#0Z{bNl>Ced>WgJ@e_zI=bAc4Y<h>;4l*pH*$>V_|FuVl2L*|p*m&q0=6lz2=eCV!
zKzt26jEaLX;(mWolB_hksvpLp9s;Znv&;bJf;E<Mqf%31(01cv)$doPG6yUaZ_QUa
zl63mCz#{L$%5k+_1<kGfa~cx_VA3Sq`yJ}f%D|tU^j(7PDWjw&(H^0?hjCarwuhSb
zO>eO}*0-XoU9uRQQPG68eUs#Gp19Dyr<fazQ97Rt?FN+!59I;wExCL^Pw5!_^K~P}
zmDQkwDfIoy*gmw315t*rwPEXcwtox{T@#|L_B&Sf=N8p?L810*B?$k0)mMzGH{8Y|
zM$44r%=TKv`oJn@B<D2ZeKR9ZEm9qztwtSQt54aCz<?A2fzbE27ETr<P5?p(@EhIx
zSl3EADP8SF=L*Yi&ZHK9S64av3Y=*26|GR#PfA%m;#FtH?`rx%T5pz`Ftr+#BEWf{
zLW2YQgTw#@I{sDTqZm$zivUyXMYK*HI$S+Q<@RUb$1dZzUUDaDnhr|_#{@lr?=;KF
z-BYOmQ+hI6x-RjoM@&-2Y~!u92EcS9Cyf3*&f<cs;EUdNCjXtr%#k?(7X7iVp&iwC
z)zj`=CTRq*S^oZbejbQSry^SbR9R&(gR)jlP_QvE2_WRqaD0!uL~=7S`}gm5EM5H7
zWqTo>`5yk1Fw-ocVxC^UMZC*6A??6KV_XGc-QLDTK{T6nt}rx?lpK1@HezmM|LD^}
zlWNN)fF4}h8_Ogu1h8(g5(9E60oO&sh&1=Q_FRm==yLpaPOT*;9u89*2Ym?u)>!?q
zfn4o~W9uK(HexQm*_#XWYSJt^31D;$(C2)az!-?OxwVH{w03jfgy|`HBg<-oZ`U|g
z`DUfG^wB`kU1+`G&<M)ZYaCo5)Qos0g6~iE)Z>2Y5}0+1aA-h<{mG9AFKT~2<QV;F
zMhD)P0`1*?*Q37AAf|-v<n#b)VNJ|^bS8w_TDi)gvKY%#x+Oy#cINQ$zv*7-Hl_bM
zuM?m+AsXC}l%#E_oz2b>?@elK>pC@{r8Tb{{2jZHQhmx?5{BfBv#;~+zX2tdVyjgA
zPc&DDP7pF1>_1u$SBw7Hf9yMD&jFq9qwi!W&|Nfe?_5ZUY4N2V4B2Yhu(NrMkq_5;
zhgnfBlL&~9ixHnj59UCK2(i8+M&J8vG&WGv06^FXZx7PRLGd0^6+eUO#){({p^TDE
z*S=DGRp7jX@W@O}1d?}pQk6m})thLrC|Ha<blX3vi|+z??PL|C+cD`*qr@&?VrpHj
zEX^u1dbA2dx~F0z6J9*AjBLUUbI(iN&)QGbg(1H^7(tAGnxdcql%Z7Lrqnv)5qjM5
z45P$eH^;|4Y>&rZYQssivyQ?deK6Gj<g-Y$EGIm2WHJe_BPStn)-ZWBUR;=FBDXe&
z4HM}E0os{RhVWCE^&%K>i$WK%(p|#ITrxi?KgMO~ane_%Ehf=MW%9T)uz|*@;sEG$
zlpj&vyZR#Md_2{aK`cDH?L9Xo7YSRfm@yd>fFi$PQ%rW@MEth5Yx%T>ps`BG;B(4r
zC$KSyZT4MVB-|q_-$%jBirHD(`L?wKUq!-~CIee~nmJ3I3~C~)H(Tgqq+5$Q8(pz*
zBkY4GTZ`NoC^Z%XzY;<DIyl*5gDQ!Wv49GF>0bo`UM%jpF?gR>z@Kns-qbfJ2TpJ~
z^H2vuO*23>IJjgFmK?%JV3(UDq-zboo=NYj%y$q`?bthSPaaGblb*I$E2j@Y+M>98
zJ72Mjl5uPXH?{4ovKYy$xB#?zN%|UE_m4`C&MFe5Lo|}wl2gm-NJb8M7jc@%b2WZq
zenLZyIR`cdXv;R->9#8c9A!6B{3+t7FNtRyNrWeF+h1lxCd-NKrAn?<Yr;?*RY3bZ
z{<dAIMOcM87EiR`6L?ucaO$4$XmX}_@Z*($d>Z%Ah&#@J8shryg;mggGNi}!{8Wwa
z7FRv?g*$&I<8R$&P?{?gd<q@NDiApsf8^9z&~5X`=^W&~`yDHsTA-zx)^a}aUAs!t
z4t+zBb2~x8Ie3Zih<C5dX&M0lP=OHY|GD9Dnn9OFan%FNbJwFvRTr9&7s1RU)fpin
z9D2%BH*cBj>if@F{E>yf{G?MBRWY$=)A#eq-$K#CJgJf|=qd<?3J_AYqYYCxTgyGO
zLop(<h#%au>Uf9&1;ey{OrY}CVvP1J8}8%Qd{X5x^+6HO?E&#w%c9!qA+swAV*UN4
zU}r-2Y<LnRVooZ3w`x(okVFCx9X*}lwHjm?eEH^1oAbm@;bvvDnLcSbP4Ma{0%}l1
zgnU0SNvc$pKzfyZu}^e55os(*xNB%P`AutA4jNzO!Fx)vgpfuc#T>Z}wok2J9a=$8
zkmVB;Rz-mAM+3-+-<V~e+5m5!Fyd$T&W{IqKK7juOnR9N^H=A=^5WQ!A9xQ9tps>5
z+yRM=B-_bab5l6b+{JYH!RfKWIjjN{<d?Z9dd1bf!_(&ZsR>mihB?#zklcZu^~NTl
z5>(<f%jn7#5I#w@<*0aDWuH<s8i?LBpvvo_3%FD3GB`AJhJ2F%Z_pWIw6r5aDRF55
z0|5Zy^GpW@%RGCp5z^Td^(^NftA1Np7<MLI64Nv_<4m&FLfiz@h|SMDGz0&f5yeKA
zatVR?^!mII#~^Wp98cYduFby6TQ6g}$<P@#6}EE$yaFf`Cq53T^_ZnPX9Vn9rLOOO
zAMu64#!K*Y0uu;-8nx@WM!m6DbK*bunH**4@5#Hysg`W)%<=<Tf1B_f^oB}T^Qz;8
zSb*r;t=8-#8F?%9%cJc}*2G>4It?sq0?@$`BU3x<{JolhOT*qah6CbZkAlU$E=d{&
zDlopv>2@`au(`1AumYEIEo?S=<j(;xo?F}@K}2dzFV;SlUuRDuM0S&rtEc^Dm{?Wm
zhfyjwd0m1B>8)8n;2SPZBC{RdRV$h0NH7GNiyM5UqwEfjtBJfvXC-?9q{>Z103;&o
zL+6cxqB$}3mW^%l_QJb_^{|$jg>=^y@WIz(mzV=c93K-^Dr<HC(5-2koB#JYlA=+u
z$4e^GAD6%gBhX~)d+Yq0RBg@AwWDeJh4&$MYID1;+5*3tvErnI$)#$(WV%IrTE**}
z=HRH<`s3@j*LValNjWb5hj4hc6>IXc;KWgGRJ#v~LFDRx{&wRvNo(iFMW$tnN_7>~
zn*U1Q+U5LVtP14;;q^}RhFYc<M;i!-+I#bmc?`bUO;V)s+#vm{mE}oE_sa(Z1<;kg
zoP9nvW;Di)^=9i}7YxU(H0}b$HTS{zw>79=(l{9)e!}UxQtOd6d6xpnRtHh!yKcCi
z8w^mOhmO(V_Y>93{J7wI@;qG}xY4(G=BoW+BUT#WQs7XWDtf#RK?X&F+Gh!S$78|`
z%Q^Vy)SGWCn)WO{yU`tjPWQygmF@Cr#4$hF$&S|k$P3Mx2r=l`1;HTZT<@%rbw_(Y
z|5a3<AnNVplT4Ze8!bYFB<%mQ^6LNL<pzm&%v=Jml@JeV1>mQCsLuA0$>v`TfYm9g
z#B@@4qdgH?OP*l>BQt-H+YAMdR?cyQXMKp652kQi5nIcW#>s{9YN+<xvx?JX>lScx
zFlQGe@T^rpoeKR6F2@w~I+9fyJ|ksQE<SryzH%O!ZDdL962Brn>JL|51Uzek;7<LG
zu^+BN{w-~|k??^{9W{Etg8+ErYM)0>0z1uMv$$9n6=&?e0H|Cvf>sJ;bMV|nxp#!0
z@MQSBkpZB>r^l9XTGq^p{MtH{V%t!BWhS&-{kVH`DWSkil})$d3qMKkBJj~J%E56k
z9=Xy|!ZS~<B(S&_R0~DO&yxM}5zq}ls+KJj7mg`Er-2!vx(6Lpg}Hp^A0L=EN73y&
z5{kFto5J*@^ll~cxfDewe!Rn9iyCr)=g_Rw%Yd|~Ib^ao1Ey;`Az(lm?6RKfa*Izo
z92vm**lc|&vo^k~ux4iENolo3jt($!s2|l{$navq1OQ8g`efO|f!A4Q`JDcgue0?^
zz<n?F%Z3M?ZKn3=uW$LX3NZ^eKxYV-EKa^dDPFI%?TI8I`tW^ZMUU?X;O@>$herwu
z)kWoXx0X~}=t*PD60p2iTs$a}2+GZHH;7wNj87D4Y!U^9ZH36QL^=%eS-|dc?{PSS
zn`C5M)USKM3<+9reW$6HASSNIP>}#&KZ;t?DT@5?5HT^tk6I@e;}PANKQIF){yaTi
zPe>T#Y@Q34N?Gcy6K!9Rj{uU1h6WbpsBJ~9MJNCl8xC5_Xa7yf7Q$i`paZMfSkiLJ
zqJk%?&_YcJo{t{?xzH8r$q$ulcF~-$SwK%g4e;u-g^EU*corQ-+~Gy9mD<ULL2$dr
zRrwyd1AR#4>~~C5#Kwp?tmB(b?aK`vqDI<2+7%Dz7DGpNIRX)}c*Ad}d{F66;1B#(
z2^ya$%^6<2GC{RE9lyyGraRWXJvZ<&mz}ew&@&niu;rrfs%U_Uz;M+p1S&2Q(ve)C
z;JtJ{=BQcG_WYkFCSDaaZ*!2ehw|*|GMV81&sb5o8JWCwqwFXwUay*IPXRcRr)))Q
z%Q`BnzH<6aiG@S|Rf=*uW6w5D^!gSCBG|GWWQIvzrsm<Zbr4f!ZEu+J?$UrCGy7kI
z(Fi^76l@w!>OaZlvpk<XS~r3@O^{AGD_Z(VSXxN|WtzE!d#jU|9S%V09?NV9U%Y!<
zz=VioO0Q6SL?64h-*TE_;2IzvuZ9LyO$`8<bOX!5@OTz-hkM6;oGriGS`!vyRO4U#
zF-uKn<QLY$X6mVC*?>_!);O)XQ=n`;m4S4~e|K_}e6be!`MPm}Nfwbu68GR6*h=fx
z-{0F(cD8^`kk)QOV+RcJFGQ4H0+zCWcrA-=4el+XP<<a>zJEd7=TmW1MVLS!CDnUQ
zRHce4t(B-hZvO-TMiHNtWl1+TVt&7C_Lf%L6#9vIJ(C+3S#-2<=Ru~e!RK#ow6I{W
zh{Q6e`?r{R4B)ds+e)Ym<tpmkpr|A|r@Z%^wASefifsCjBm6PF95DTm|DOO$6143u
zlSj4U4G&m|Dyl7OGXrUBJe)zV8E`mR_p)3;_QMen8cOEsRp+dZMxM>HMMwE>`lNQ$
zqouowX!Y7J`-)3;sGPRg%9p_Ie0)c=+ro?3PlG7$A9+ShV-?;NwmaOBaW@rcQ){Y3
z#gbNI{$>MTU%Xhz@Aj1UOv6<A(A1#`K9AGPekPYPeyG$ccsh{4l8|-D0;XmhkRU9A
zgilSHGT11%tQ4&9Q$Q7wPVHd}+-bqpF*s1r1<wuH@|j9mjW+<z4R&wXY0CH-U`Q8q
z;ygUI7EK{s5&u8%1j(TpA;RhH!fTP24F%hoK7!xwtt<AQ98@FFal(OeV@0(cG^kA4
ze6L5jN-{KQ5>AwK2XpZ8(TI5$3&^}J1~zx)A=KA|x=RZlif}O~gFBzrL4X=uIwxks
z*!{gRmfoMP5$@axZCe9n#7ch1=(uU*w}bNKSi?_V-=nx1kW5}>k1^H9?mRU%UdAZ(
z2%$4@q*F~T?(V^3Z%R&!&4&Tl@-e|&5kp5g;Ks2}m8=Yi<8Prj35ZadDA-6$OJ6&_
z%vU*})Th4RPYi{-Hnrz5yhXRJ-w${mHoznS&}W3liAXOg2moYP5YpWlFW6Z^6y!~F
zCK_^Ps|^FFb)cvHz($rU61O`hP1Qm1O*6fL#X3>UBtZrne%m@5$mVkT3G9S2GSLZA
z8_=ya1nVz0uLvzE@W=EsWI?wi!+p$>LpTJz<B-%P2OOd2qsS{?Fa~9%YUF8q5mP`-
z??_Jun_mHM;_#rNvk*=;`quzH9#`{MlsJxnNFZTt)W>bx-0F>aeo%O*G^XOQ85#DP
z6=1|FIN7LEAplwo*K0sSnuo7v@%3rA>R>X9QygnRYg*h&vI`g(Vkihc!xt|IfIa_f
zAKO}I#RHYdj_{`gy@}^#csA9WrzglUx0GU@-4}Fjg6R+yODJ~7z8459Q#o*b*-tGW
zTPvVmmyOuS1Q;hlQc&kudmHgtLh<axD{TTBV-#*2C|J~|em7?kF=?l33AS)$EEFP0
z`q=>;=fFfk#3w|L<E1>R;(e)#FuT8EG<OGhYiS;w;$SLtk^weR!y*j2r*G3>tWjk8
zivIYUTSdb*3WMo4*n!GQo~uXFdw>#2xg_vY$OXgKS5Y0_&uYN7(5ng3w(tMBBOho1
z&YBXX{K6Z10cFJ^?LJHOHpox6Ir+=+)MApE7cgtidF{OBa^fTK?f_iqMNeQC@q!xw
zRY3sHNb`N!JwZH3w<nPzn&k3s`;MlUmNTrv0$#A^3L~nkC;6IyiMr)mWtA=?oCV1U
zVpVL_JjIXj31$_`@z<HB*EvC?QM?#LraDh2C(5%HW`^qJE;mtEs9R9@82!Gc90b;+
zW2kse=kNQt>~(C^AYpSGGtIsxN^BWV0H7undGL)uIaD|2YrDzVsN=daCpXJlwTZTX
zNWZz|{eiU3<eaP#d~5TS<OvuP=_A2mOk2J&2|BNNN~epY&k-y?66HwYEZ$d;;AXeR
zV;NQ+ED5t2cbwVZJ$JDE;dF<%o332C3{oBFCLtE(WUgqR4v6$aVzD6!uI;+|(dPBz
zJ{bh)h3z;Ld-L-QK>z0KAV)q3E8U}HvwD^LDW<E%zy_AACX3p~Yn8m;H4KcKEo`XD
z4zU;xfq+u3OnrujNDG^Rs{5E7Qubn}JcDQ|SJks9(4kJFsZ9ArD8K{9Rv(A#p=`QG
z*N#{oEp7$gHws+k8D^581kt7mg;Wq4A3Sbdb|M&yl8(A+kE_GMU@`E>90#DccJ$Wo
zn!c)k%vR7G;NdTjUI+Eq8WXKj>I|eo@qXZECoI$xa*qp&-#}p9lI-k!fa<?@h_O1|
zpdhP`@-w#pG$w++R^`^t{g>+?(%>`8!E!jhBrJS*-xGdyWF#Z2i@;HRzV~1D)ajmN
zfnYXDD-uW2Y}ox^bkl5mir!2o{QK-e9}#H~>~gEkw1-q=c{qd<tb6<FbUQI7g`D5O
z`!4neZpujiEC~>vgfWj|_y@WEFOOIH&+oVYt(yzej?bjMoC+ec!5HHrw9RFljd!ck
zGoD`s&vi7I1d$zT7j2=g(Df~D+||A)Bprqhsk{CySsNCfnjFaUJ??k?4s$f*6n0}K
z@~0mRZ*6fULGnuo@qv<F03tz_Y^@Jhgu_Z+zs(@|R0A&D6qy$xwoATP_K6%3iVF>#
zmE_i~5mG<XN9^f@iV1aK86Is16XFEs7Genf-d4!x-OA@gs>(udTF{0u>8fYDeb%Iy
zUS0}EfsSXrm=F)0MdAn3Ki^s4bRMhBUk);i5zt^rF{g61+E^xz^1|*)`v#zU^G&Q&
zd$<*4AZ8dE7^9?V5fVetwVe(Q=D7KlamB8Z3P%AS%#+@Br;u|QXAubslFW`RgyBlV
zv~=_A?slkg1c&0jO;jeH%}fXkKWou0fRQy+402t40+Xgvmj0boA99DTnhlqf%i<5k
z+LWLYq^VO3o}~<<V2C8MvFJE|&=}iui#c_TBSq?$%2L7PO*A|)U4tHe$#yKp*u0{f
zpw46Elt&8Ng@hWDC-M?#eh};Q!k6`$`4JLSaXKVI8z|_Bc=k90O*%#MXIz(<{b}mV
z6y)*NMcr$YZYM?gp_bEt*f+sMmo%;uThTZf;eQU^b&9#htwkvw`c67vieu3bcMVjk
z&L39Jh{F;O*L{*G#?d)M?XGbhE$Xy9Ps%>bT1wWU&y$=cCGe%hc5upW!`uRoplZ0C
zC?+G-e#NBUJ;y&Az3PZ_tE_@CCrnvayo>Cl`Zl#qVa2Uru9;DWao9i9=(LoBg$(xQ
zso&_>i&gFC)G2lkf=r)2yk-@nf?moW30ut5Pw|mSIQZ_~m13T<j^+&a#=igZe$H#W
zyo<yVXOX!n)(dqIR<JuGaTwCE=`6DD$V_4}yna!iE2brHghz$;!N0KVP#yJS-4a9d
z_|K9SSM>8F_>~VR!RbTDgJ*tp$P9gS9?)0Y<83P$*ziXTZqnR*I!L&-UHeW~#WV)I
z7y+GyTF;78X^zEDEj<RjcMHmm6zj7ZgZ5eKl|$`X@86Z=ABB=BSljsy@=zRFDaK8j
z$CQju7A*Y)t<v*<LLLi-I=gPpSE<lAC`lk?+@W~W4?Fj3WOkV%R~mh-35*qFWOZZh
zyV)s>wBK6}0q!e*R_A74^tgOE*9&xRGtBbWoFSM;8~$+s7v=?}ePHE48=Jct5Bm(&
ze~s6v@;8G1bdQ~ASQ2s2(P;PP3{V9|<RvOWx=DmA;K`70_tUy@6!jGfNx9;Yk>!o$
z2zPU^>0LNoO=TCBe<{`HMMh)37Pwa~Y-I|l0Pz8|OzO6QdktRWY{I!kr0}*7T_NU}
znZQOVKd>V_zyUd2`OQL+xRb&%Js{NbX<`ihHtma7M-OFsC<v*^g+AWoK-y8;)2IP`
zTzy%px{-PRfz^1W-9dYE0~5+^q`@qYw)ZB3L`2g5Iu5M$d9DSOSP?TeRibpk0AXEt
z;FFly?K$1$z)z0z-U5GgOpka6s4ETmB$?l+vn`}cYMov1u7F@IW<-vb`9hRrI#<2^
z7{=hdQ}?MfFbSmYJrt+WoXZS!BO@saV8Ds+_)3^uyM|EMiLKOS_rw9iC7LQ!1dWN9
zh{IDt{AU-Ob^O6f;A<+e%F4+XZR#(-{|es%Hn=2rpqKEmA}2kNV=LU$S*`h%83#u#
zqdZFGz~pPSrWy=~IJk=u!6l*#cbd?<Ui0;t$6%Yi8~|245C+?@)BrMpYACtmiUy(q
zTk^MP9H^<wfGknWb82;GurRo&vJ@L*wqooe#2KTA5}xGY>tdXe*OQfsW-LLPIB_0v
zhv@;9Or0jGK|pl*4YFfSQSe9f`EG5!b`@i&S8rzQd5nT&CV%+K^6xi>lDOjS8Dl^R
zQwD=S?0jl#s!P44QSe9f`EF^Fwx&rcrk8;@q@K>OQBr3(Jz?uM<@UQX)SZvv^Gjr+
z!pm;Q2_|}uR@F`{TiWM_wQnGiQ)+hxr0plMv-zarS`!%0s3AlfTo|(5MZ~U2oAU|m
z7yN+aFX4XrKff}t5w7$R_5ms$WU>~d9%S9%A0Qoe3bL)NA1RX^TU2m$jVJS96JGtZ
z9|9SR4!2bnv%~^mSaAMEIi{S4h9nTY3!7ZmqL{66cpOLLml6E<kHCbrr=Z_PDsU=K
z>!wL%jZO5_E)?~?#-F`jAyfRSE4+|{`{I-xS4<hXfXLAchu?Z|ORthJ7hA7;SMF!v
znzmQJrDN_<5#s_x8skS8DXH+_tpY2<5w9bsDuv8dmW17`_7!ClB8TG>)m!%{g(d3-
zk9?YQz()YHedI8vOQ`4dNdNYj5ZFf(@PbmWY69$`e^}er8#%*6_k8RzG}*i`S+?3I
znW9l|EG7+9f0Um_vQ<X!J~iLi9xXkl7+kFlnKS=HJl<1^%l<gzB<*OxfTZ=)4438Q
zP(BJmpI_nCqZ(Gd7?fJ61=DrI<y-Cnt5M(!B8ci}!0&fWv?z|Ou?}@{m^>m3D=Tq0
z;yz$;K3v+7g(IKnwevSE4w&_s<EP%1)^$wG+~ZpMqvdL(*9;?}7eZeMD}7r*)FvN3
zO*wlWRLK%2tO@ZI9U6^<On(L^`qS=-!#sC|M3NM<lc0s1Z{et<Bri58$Hdh<7@1(}
zfz+^a+r3RL?|84W*zm^`*T~?43x1zyDY`=}F<c$$!afX6X{E<T(SN7ul*|`fm)Ps~
zkR-Uj{Vn!+1!_xrpm8z$+O{J@H;7T?E_$`QJ02g3*n4%L0&MKwI`EI^BjNl6np9Kl
zz>VXAm2yTRaJUh&w{r=cGz|CampKAKgAOi$C#32J(uqBXtXmUlNU@>6;y;IcE@57@
zN6-9=TS{v_;bt_QUD?C<bPSI!l@byV@K`O$pP#Y%Mo=iFzB|Amk;STZHBcJB&P5>a
zRkJ|SeX`#=D4LpbdsWC=A!l4n+K4(FGDI2Wtm!l&XhUEvOc07ouj`}qhuk<1Vbnja
zcAv+-LoY5j;ksY^OFDIy6oT2;l*cr$dr6*yW^5!hkO}-2U{UGYH;tRa0ks%l3oSni
zgj8key5+t*<Ul19=7-~#5&ZZpRhzdBN6r_O1y0Y@G@Nq1rr*x3LMkYyH<Q}Pf)dx>
zzQ_JZyNMQFpMD&hjF8YadFCv0dP;KJZ%z*|l`GEYgWZlmxRVQ7A%k^!vadm1(dr>V
zVHoLEiFjZThHKws`_qmpF@R+DQAwt;q*S_JR9bJvkMCW3*wS~#i3P;~CiMADayMSC
z`^U4LVhXH6?2P`uARg8Ao?Z?k%r&(MAU(82+ZPylJZ~8`xvu^!&icB>hvYt#vxBKb
zov!A}BGso_fCuUwLt>e|b4oIim>E;*aLh__^bc1e4y$b`Ate^p*CiUsMl`N98}1+N
z%8A)NHn-dZR-?ccL4aB0=|qdRh9YpLlhYf>)F?Yb&kQ=wjgisb-7S4bNr2EIUyLWp
zC0-f40OxVz$K=3lzCR8rIShGK9pq~t#77Y~3wkwYrVBPuFvMYOzLR<{9=~kz=_-*2
zte%C)iAF~3_rMDokFk<(B7o7>335RWKqN0$2F<6Cj_czvZI5!sto?TBLsJWZ_JYOb
zzw)qd4T7MVLUi+N0lgobrfD4|`~OU<{So;TG-?owke<;2QYh7(buiSZY>%nJUeGiS
z9sgN4r$_TJvXHvkv6Z~q;yOm#untqMOqn7pZ{Hl;6#Du6f6VfakCQP?je-C86}T%b
zD2pXB2eC=0ICU=FP&w#(r~~WQ4RCxaI|7b2ncGr(8Lshe@HV9&NQmIu=0miGDfEw)
z!{UyAJZ?JwHvy>B_Cx}xN(U8#+)of`nkG*g=0;Dkb0ZPFjz}z@39qN&HnYoZEGerV
z*8PX2AW9Fw?|Hd@2~GFW#~n0GmliggPU}YNdz8QQkiwao)1uW9p`Vqe{{8f}tc=cX
z)<p#~w^X@k_ygvRI@kR<Use{jT(g4sAwC%nPdRGu;C5=vTRHur*KsAmcbvi`Gr;jP
zdv)t`K2#>PaF``_xs!2st6%_o4gjl=wL}mRuniREBsZ%(yO-)%k)3Tym!z6juc`6`
zfv+O}C^-;-b6=ih6{-SkljO%c9+Y?n`6z~VI%gP}mS?aX|E5YX9gA(TaLGvIpUF%3
F008Q#p-=z-

literal 0
HcmV?d00001


From 48e6b92cc378c937e59719f2c0f482bf76c9ca81 Mon Sep 17 00:00:00 2001
From: Xuan Son Nguyen <thichthat@gmail.com>
Date: Tue, 25 Jun 2024 13:56:49 +0200
Subject: [PATCH 05/15] Add chat template support for llama-cli (#8068)

* add chat template support for llama-cli

* add help message

* server: simplify format_chat

* more consistent naming

* improve

* add llama_chat_format_example

* fix server

* code style

* code style

* Update examples/main/main.cpp

Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>

---------

Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
---
 common/common.cpp            | 60 +++++++++++++++++++++++++++++++++++-
 common/common.h              | 23 ++++++++++++++
 examples/main/main.cpp       | 55 +++++++++++++++++++++++++--------
 examples/server/server.cpp   | 12 ++------
 examples/server/utils.hpp    | 29 +++--------------
 llama.cpp                    |  4 +--
 tests/test-chat-template.cpp | 20 ++++++++++++
 7 files changed, 154 insertions(+), 49 deletions(-)

diff --git a/common/common.cpp b/common/common.cpp
index 0ca7b4430f765..da6db4dc6a09c 100644
--- a/common/common.cpp
+++ b/common/common.cpp
@@ -1444,7 +1444,10 @@ void gpt_params_print_usage(int /*argc*/, char ** argv, const gpt_params & param
     options.push_back({ "main",        "       --cfg-negative-prompt-file FNAME",
                                                                         "negative prompt file to use for guidance" });
     options.push_back({ "main",        "       --cfg-scale N",          "strength of guidance (default: %.1f, 1.0 = disable)", (double)sparams.cfg_scale });
-
+    options.push_back({ "main",        "       --chat-template JINJA_TEMPLATE",
+                                                                        "set custom jinja chat template (default: template taken from model's metadata)\n"
+                                                                        "only commonly used templates are accepted:\n"
+                                                                        "https://github.com/ggerganov/llama.cpp/wiki/Templates-supported-by-llama_chat_apply_template" });
     options.push_back({ "grammar" });
     options.push_back({ "*",           "       --grammar GRAMMAR",      "BNF-like grammar to constrain generations (see samples in grammars/ dir) (default: '%s')", sparams.grammar.c_str() });
     options.push_back({ "*",           "       --grammar-file FNAME",   "file to read grammar from" });
@@ -2604,12 +2607,67 @@ bool llama_should_add_bos_token(const llama_model * model) {
     return add_bos != -1 ? bool(add_bos) : (llama_vocab_type(model) == LLAMA_VOCAB_TYPE_SPM);
 }
 
+//
+// Chat template utils
+//
+
 bool llama_chat_verify_template(const std::string & tmpl) {
     llama_chat_message chat[] = {{"user", "test"}};
     int res = llama_chat_apply_template(nullptr, tmpl.c_str(), chat, 1, true, nullptr, 0);
     return res >= 0;
 }
 
+std::string llama_chat_apply_template(const struct llama_model * model,
+        const std::string & tmpl,
+        const std::vector<llama_chat_msg> & msgs,
+        bool add_ass) {
+    int alloc_size = 0;
+    std::vector<llama_chat_message> chat;
+    for (auto & msg : msgs) {
+        chat.push_back({msg.role.c_str(), msg.content.c_str()});
+        alloc_size += (msg.role.size() + msg.content.size()) * 1.25;
+    }
+
+    const char * ptr_tmpl = tmpl.empty() ? nullptr : tmpl.c_str();
+    std::vector<char> buf(alloc_size);
+
+    // run the first time to get the total output length
+    int32_t res = llama_chat_apply_template(model, ptr_tmpl, chat.data(), chat.size(), add_ass, buf.data(), buf.size());
+
+    // if it turns out that our buffer is too small, we resize it
+    if ((size_t) res > buf.size()) {
+        buf.resize(res);
+        res = llama_chat_apply_template(model, ptr_tmpl, chat.data(), chat.size(), add_ass, buf.data(), buf.size());
+    }
+
+    std::string formatted_chat(buf.data(), res);
+    return formatted_chat;
+}
+
+std::string llama_chat_format_single(const struct llama_model * model,
+        const std::string & tmpl,
+        const std::vector<llama_chat_msg> & past_msg,
+        const llama_chat_msg & new_msg,
+        bool add_ass) {
+    auto fmt_past_msg = llama_chat_apply_template(model, tmpl, past_msg, false);
+    std::vector<llama_chat_msg> chat_new(past_msg);
+    chat_new.push_back(new_msg);
+    auto fmt_new_msg = llama_chat_apply_template(model, tmpl, chat_new, add_ass);
+    auto formatted = fmt_new_msg.substr(fmt_past_msg.size(), fmt_new_msg.size() - fmt_past_msg.size());
+    return formatted;
+}
+
+std::string llama_chat_format_example(const struct llama_model * model,
+        const std::string & tmpl) {
+    std::vector<llama_chat_msg> msgs = {
+        {"system",    "You are a helpful assistant"},
+        {"user",      "Hello"},
+        {"assistant", "Hi there"},
+        {"user",      "How are you?"},
+    };
+    return llama_chat_apply_template(model, tmpl, msgs, true);
+}
+
 //
 // KV cache utils
 //
diff --git a/common/common.h b/common/common.h
index a5c738f8b643f..de90eec5113f7 100644
--- a/common/common.h
+++ b/common/common.h
@@ -365,9 +365,32 @@ bool llama_should_add_bos_token(const llama_model * model);
 // Chat template utils
 //
 
+// same with llama_chat_message, but uses std::string
+struct llama_chat_msg {
+    std::string role;
+    std::string content;
+};
+
 // Check if the template supplied via "--chat-template" is supported or not. Returns true if it's valid
 bool llama_chat_verify_template(const std::string & tmpl);
 
+// CPP wrapper for llama_chat_apply_template
+std::string llama_chat_apply_template(const struct llama_model * model,
+        const std::string & tmpl,
+        const std::vector<llama_chat_msg> & chat,
+        bool add_ass);
+
+// Format single message, while taking into account the position of that message in chat history
+std::string llama_chat_format_single(const struct llama_model * model,
+        const std::string & tmpl,
+        const std::vector<llama_chat_msg> & past_msg,
+        const llama_chat_msg & new_msg,
+        bool add_ass);
+
+// Returns an example of formatted chat
+std::string llama_chat_format_example(const struct llama_model * model,
+        const std::string & tmpl);
+
 //
 // KV cache utils
 //
diff --git a/examples/main/main.cpp b/examples/main/main.cpp
index b97b7b7937f02..cfaf6a6e8ba4a 100644
--- a/examples/main/main.cpp
+++ b/examples/main/main.cpp
@@ -39,12 +39,12 @@ static std::ostringstream       * g_output_ss;
 static std::vector<llama_token> * g_output_tokens;
 static bool is_interacting = false;
 
-static bool file_exists(const std::string &path) {
+static bool file_exists(const std::string & path) {
     std::ifstream f(path.c_str());
     return f.good();
 }
 
-static bool file_is_empty(const std::string &path) {
+static bool file_is_empty(const std::string & path) {
     std::ifstream f;
     f.exceptions(std::ifstream::failbit | std::ifstream::badbit);
     f.open(path.c_str(), std::ios::in | std::ios::binary | std::ios::ate);
@@ -117,6 +117,14 @@ static void llama_log_callback_logTee(ggml_log_level level, const char * text, v
     LOG_TEE("%s", text);
 }
 
+static std::string chat_add_and_format(struct llama_model * model, std::vector<llama_chat_msg> & chat_msgs, std::string role, std::string content) {
+    llama_chat_msg new_msg{role, content};
+    auto formatted = llama_chat_format_single(
+        model, g_params->chat_template, chat_msgs, new_msg, role == "user");
+    chat_msgs.push_back({role, content});
+    return formatted;
+}
+
 int main(int argc, char ** argv) {
     gpt_params params;
     g_params = &params;
@@ -190,6 +198,7 @@ int main(int argc, char ** argv) {
     llama_model * model;
     llama_context * ctx;
     llama_context * ctx_guidance = NULL;
+    std::vector<llama_chat_msg> chat_msgs;
     g_model = &model;
     g_ctx = &ctx;
 
@@ -215,6 +224,8 @@ int main(int argc, char ** argv) {
                 __func__, n_ctx_train, n_ctx);
     }
 
+    LOG_TEE("%s: chat template example: %s\n", __func__, llama_chat_format_example(model, params.chat_template).c_str());
+
     // print system information
     {
         LOG_TEE("\n");
@@ -249,16 +260,21 @@ int main(int argc, char ** argv) {
 
     std::vector<llama_token> embd_inp;
 
-    if (params.interactive_first || !params.prompt.empty() || session_tokens.empty()) {
-        LOG("tokenize the prompt\n");
-        embd_inp = ::llama_tokenize(ctx, params.prompt, true, true);
-    } else {
-        LOG("use session tokens\n");
-        embd_inp = session_tokens;
-    }
+    {
+        auto prompt = params.conversation
+            ? chat_add_and_format(model, chat_msgs, "system", params.prompt) // format the system prompt in conversation mode
+            : params.prompt;
+        if (params.interactive_first || !params.prompt.empty() || session_tokens.empty()) {
+            LOG("tokenize the prompt\n");
+            embd_inp = ::llama_tokenize(ctx, prompt, true, true);
+        } else {
+            LOG("use session tokens\n");
+            embd_inp = session_tokens;
+        }
 
-    LOG("prompt: \"%s\"\n", log_tostr(params.prompt));
-    LOG("tokens: %s\n", LOG_TOKENS_TOSTR_PRETTY(ctx, embd_inp).c_str());
+        LOG("prompt: \"%s\"\n", log_tostr(prompt));
+        LOG("tokens: %s\n", LOG_TOKENS_TOSTR_PRETTY(ctx, embd_inp).c_str());
+    }
 
     // Should not run without any tokens
     if (embd_inp.empty()) {
@@ -478,6 +494,7 @@ int main(int argc, char ** argv) {
     std::vector<int>   input_tokens;  g_input_tokens  = &input_tokens;
     std::vector<int>   output_tokens; g_output_tokens = &output_tokens;
     std::ostringstream output_ss;     g_output_ss     = &output_ss;
+    std::ostringstream assistant_ss; // for storing current assistant message, used in conversation mode
 
     // the first thing we will do is to output the prompt, so set color accordingly
     console::set_display(console::prompt);
@@ -793,11 +810,18 @@ int main(int argc, char ** argv) {
                         is_antiprompt = true;
                     }
 
+                    chat_add_and_format(model, chat_msgs, "system", assistant_ss.str());
                     is_interacting = true;
                     printf("\n");
                 }
             }
 
+            // if current token is not EOG, we add it to current assistant message
+            if (params.conversation) {
+                auto id = llama_sampling_last(ctx_sampling);
+                assistant_ss << llama_token_to_piece(ctx, id, false);
+            }
+
             if (n_past > 0 && is_interacting) {
                 LOG("waiting for user input\n");
 
@@ -848,8 +872,12 @@ int main(int argc, char ** argv) {
                         string_process_escapes(buffer);
                     }
 
+                    std::string user_inp = params.conversation
+                        ? chat_add_and_format(model, chat_msgs, "user", std::move(buffer))
+                        : std::move(buffer);
+                    // TODO: one inconvenient of current chat template implementation is that we can't distinguish between user input and special tokens (prefix/postfix)
                     const auto line_pfx = ::llama_tokenize(ctx, params.input_prefix, false, true);
-                    const auto line_inp = ::llama_tokenize(ctx, buffer,              false, false);
+                    const auto line_inp = ::llama_tokenize(ctx, user_inp,            false, params.conversation);
                     const auto line_sfx = ::llama_tokenize(ctx, params.input_suffix, false, true);
 
                     LOG("input tokens: %s\n", LOG_TOKENS_TOSTR_PRETTY(ctx, line_inp).c_str());
@@ -864,6 +892,9 @@ int main(int argc, char ** argv) {
                         output_ss << llama_token_to_piece(ctx, token);
                     }
 
+                    // reset assistant message
+                    assistant_ss.str("");
+
                     n_remain -= line_inp.size();
                     LOG("n_remain: %d\n", n_remain);
                 } else {
diff --git a/examples/server/server.cpp b/examples/server/server.cpp
index f9a86961f9c8e..ae768097baa0e 100644
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@@ -2606,17 +2606,9 @@ int main(int argc, char ** argv) {
 
     // print sample chat example to make it clear which template is used
     {
-        json chat;
-        chat.push_back({{"role", "system"},    {"content", "You are a helpful assistant"}});
-        chat.push_back({{"role", "user"},      {"content", "Hello"}});
-        chat.push_back({{"role", "assistant"}, {"content", "Hi there"}});
-        chat.push_back({{"role", "user"},      {"content", "How are you?"}});
-
-        const std::string chat_example = format_chat(ctx_server.model, params.chat_template, chat);
-
         LOG_INFO("chat template", {
-            {"chat_example", chat_example},
-            {"built_in", params.chat_template.empty()},
+            {"chat_example", llama_chat_format_example(ctx_server.model, params.chat_template)},
+            {"built_in",     params.chat_template.empty()},
         });
     }
 
diff --git a/examples/server/utils.hpp b/examples/server/utils.hpp
index 63fde9c9faabe..7ef2a519a10c7 100644
--- a/examples/server/utils.hpp
+++ b/examples/server/utils.hpp
@@ -118,36 +118,17 @@ static inline void server_log(const char * level, const char * function, int lin
 
 // Format given chat. If tmpl is empty, we take the template from model metadata
 inline std::string format_chat(const struct llama_model * model, const std::string & tmpl, const std::vector<json> & messages) {
-    size_t alloc_size = 0;
-    // vector holding all allocated string to be passed to llama_chat_apply_template
-    std::vector<std::string> str(messages.size() * 2);
-    std::vector<llama_chat_message> chat(messages.size());
+    std::vector<llama_chat_msg> chat;
 
     for (size_t i = 0; i < messages.size(); ++i) {
         const auto & curr_msg = messages[i];
-        str[i*2 + 0]    = json_value(curr_msg, "role",    std::string(""));
-        str[i*2 + 1]    = json_value(curr_msg, "content", std::string(""));
-        alloc_size     += str[i*2 + 1].length();
-        chat[i].role    = str[i*2 + 0].c_str();
-        chat[i].content = str[i*2 + 1].c_str();
+        std::string role    = json_value(curr_msg, "role",    std::string(""));
+        std::string content = json_value(curr_msg, "content", std::string(""));
+        chat.push_back({role, content});
     }
 
-    const char * ptr_tmpl = tmpl.empty() ? nullptr : tmpl.c_str();
-    std::vector<char> buf(alloc_size * 2);
-
-    // run the first time to get the total output length
-    int32_t res = llama_chat_apply_template(model, ptr_tmpl, chat.data(), chat.size(), true, buf.data(), buf.size());
-
-    // if it turns out that our buffer is too small, we resize it
-    if ((size_t) res > buf.size()) {
-        buf.resize(res);
-        res = llama_chat_apply_template(model, ptr_tmpl, chat.data(), chat.size(), true, buf.data(), buf.size());
-    }
-
-    const std::string formatted_chat(buf.data(), res);
-
+    auto formatted_chat = llama_chat_apply_template(model, tmpl, chat, true);
     LOG_VERBOSE("formatted_chat", {{"text", formatted_chat.c_str()}});
-
     return formatted_chat;
 }
 
diff --git a/llama.cpp b/llama.cpp
index 49bc93c028a2a..33e6cb7229aab 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -18818,10 +18818,10 @@ static int32_t llama_chat_apply_template_internal(
         if (add_ass) {
             ss << "<|im_start|>assistant\n";
         }
-    } else if (tmpl == "llama2" || tmpl.find("[INST]") != std::string::npos) {
+    } else if (tmpl == "llama2" || tmpl == "mistral" || tmpl.find("[INST]") != std::string::npos) {
         // llama2 template and its variants
         // [variant] support system message
-        bool support_system_message = tmpl.find("<<SYS>>") != std::string::npos;
+        bool support_system_message = tmpl.find("<<SYS>>") != std::string::npos || tmpl == "mistral";
         // [variant] space before + after response
         bool space_around_response = tmpl.find("' ' + eos_token") != std::string::npos;
         // [variant] add BOS inside history
diff --git a/tests/test-chat-template.cpp b/tests/test-chat-template.cpp
index cef9a650bdfdf..d19ba8633e8c2 100644
--- a/tests/test-chat-template.cpp
+++ b/tests/test-chat-template.cpp
@@ -7,6 +7,7 @@
 #include <cassert>
 
 #include "llama.h"
+#include "common.h"
 
 int main(void) {
     llama_chat_message conversation[] = {
@@ -119,5 +120,24 @@ int main(void) {
         std::cout << output << "\n-------------------------\n";
         assert(output == expected);
     }
+
+    // test llama_chat_format_single
+    std::cout << "\n\n=== llama_chat_format_single ===\n\n";
+    std::vector<llama_chat_msg> chat2;
+    chat2.push_back({"system", "You are a helpful assistant"});
+    chat2.push_back({"user", "Hello"});
+    chat2.push_back({"assistant", "I am assistant"});
+    llama_chat_msg new_msg{"user", "How are you"};
+
+    auto fmt_single = [&](std::string tmpl) {
+        auto output = llama_chat_format_single(nullptr, tmpl, chat2, new_msg, true);
+        std::cout << "fmt_single(" << tmpl << ")\n" << output << "\n-------------------------\n";
+        return output;
+    };
+    assert(fmt_single("chatml") == "<|im_start|>user\nHow are you<|im_end|>\n<|im_start|>assistant\n");
+    assert(fmt_single("llama2") == "[INST] How are you [/INST]");
+    assert(fmt_single("gemma") == "<start_of_turn>user\nHow are you<end_of_turn>\n<start_of_turn>model\n");
+    assert(fmt_single("llama3") == "<|start_header_id|>user<|end_header_id|>\n\nHow are you<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n");
+
     return 0;
 }

From 49c03c79cda17913b72260acdc8157b742cee41c Mon Sep 17 00:00:00 2001
From: Xuan Son Nguyen <thichthat@gmail.com>
Date: Tue, 25 Jun 2024 13:59:54 +0200
Subject: [PATCH 06/15] cvector: better prompt handling, add "mean vector"
 method (#8069)

* remove completions file

* fix inverted vector

* add mean method

* code style

* remove inverted pca hotfix
---
 common/common.cpp                             | 22 +++---
 common/common.h                               | 17 +++--
 examples/cvector-generator/README.md          | 17 ++++-
 .../cvector-generator/cvector-generator.cpp   | 74 ++++++++++---------
 examples/cvector-generator/mean.hpp           | 48 ++++++++++++
 examples/cvector-generator/negative.txt       |  5 +-
 examples/cvector-generator/pca.hpp            |  5 +-
 examples/cvector-generator/positive.txt       |  5 +-
 8 files changed, 133 insertions(+), 60 deletions(-)
 create mode 100644 examples/cvector-generator/mean.hpp

diff --git a/common/common.cpp b/common/common.cpp
index da6db4dc6a09c..c76d0e2c33be5 100644
--- a/common/common.cpp
+++ b/common/common.cpp
@@ -1263,11 +1263,6 @@ bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_pa
         return true;
     }
     // cvector params
-    if (arg == "--completions-file") {
-        CHECK_ARG
-        params.cvector_completions_file = argv[i];
-        return true;
-    }
     if (arg == "--positive-file") {
         CHECK_ARG
         params.cvector_positive_file = argv[i];
@@ -1278,11 +1273,6 @@ bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_pa
         params.cvector_negative_file = argv[i];
         return true;
     }
-    if (arg == "--completions") {
-        CHECK_ARG
-        params.n_completions = std::stoi(argv[i]);
-        return true;
-    }
     if (arg == "--pca-batch") {
         CHECK_ARG
         params.n_pca_batch = std::stoi(argv[i]);
@@ -1293,6 +1283,14 @@ bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_pa
         params.n_pca_iterations = std::stoi(argv[i]);
         return true;
     }
+    if (arg == "--method") {
+        CHECK_ARG
+        std::string value(argv[i]);
+        /**/ if (value == "pca") { params.cvector_dimre_method = DIMRE_METHOD_PCA; }
+        else if (value == "mean") { params.cvector_dimre_method = DIMRE_METHOD_MEAN; }
+        else { invalid_param = true; }
+        return true;
+    }
 #ifndef LOG_DISABLE_LOGS
     // Parse args for logging parameters
     if (log_param_single_parse(argv[i])) {
@@ -1626,11 +1624,9 @@ void gpt_params_print_usage(int /*argc*/, char ** argv, const gpt_params & param
     options.push_back({ "cvector",     "-o,    --output FNAME",         "output file (default: '%s')", params.cvector_outfile.c_str() });
     options.push_back({ "cvector",     "       --positive-file FNAME",  "positive prompts file, one prompt per line (default: '%s')", params.cvector_positive_file.c_str() });
     options.push_back({ "cvector",     "       --negative-file FNAME",  "negative prompts file, one prompt per line (default: '%s')", params.cvector_negative_file.c_str() });
-    options.push_back({ "cvector",     "       --completions-file FNAME",
-                                                                        "completions file (default: '%s')", params.cvector_completions_file.c_str() });
-    options.push_back({ "cvector",     "       --completions N",        "number of lines of completions file to use (default: %d)", params.n_completions });
     options.push_back({ "cvector",     "       --pca-batch N",          "batch size used for PCA. Larger batch runs faster, but uses more memory (default: %d)", params.n_pca_batch });
     options.push_back({ "cvector",     "       --pca-iter N",           "number of iterations used for PCA (default: %d)", params.n_pca_iterations });
+    options.push_back({ "cvector",     "       --method {pca,mean}",    "dimensionality reduction method to be used (default: pca)" });
 
     printf("usage: %s [options]\n", argv[0]);
 
diff --git a/common/common.h b/common/common.h
index de90eec5113f7..c541204f6743b 100644
--- a/common/common.h
+++ b/common/common.h
@@ -52,6 +52,12 @@ int32_t cpu_get_num_math();
 // CLI argument parsing
 //
 
+// dimensionality reduction methods, used by cvector-generator
+enum dimre_method {
+    DIMRE_METHOD_PCA,
+    DIMRE_METHOD_MEAN,
+};
+
 struct gpt_params {
     uint32_t seed                 = LLAMA_DEFAULT_SEED; // RNG seed
 
@@ -238,13 +244,12 @@ struct gpt_params {
     bool compute_ppl    = true;  // whether to compute perplexity
 
     // cvector-generator params
-    int n_completions = 64;
-    int n_pca_batch = 20;
+    int n_pca_batch = 100;
     int n_pca_iterations = 1000;
-    std::string cvector_outfile          = "control_vector.gguf";
-    std::string cvector_completions_file = "examples/cvector-generator/completions.txt";
-    std::string cvector_positive_file    = "examples/cvector-generator/positive.txt";
-    std::string cvector_negative_file    = "examples/cvector-generator/negative.txt";
+    dimre_method cvector_dimre_method = DIMRE_METHOD_PCA;
+    std::string cvector_outfile       = "control_vector.gguf";
+    std::string cvector_positive_file = "examples/cvector-generator/positive.txt";
+    std::string cvector_negative_file = "examples/cvector-generator/negative.txt";
 };
 
 void gpt_params_handle_model_default(gpt_params & params);
diff --git a/examples/cvector-generator/README.md b/examples/cvector-generator/README.md
index 5182e906d9180..be4dd5250f15f 100644
--- a/examples/cvector-generator/README.md
+++ b/examples/cvector-generator/README.md
@@ -11,13 +11,16 @@ Related PRs:
 
 ```sh
 # CPU only
-./cvector-generator -m ./dolphin-2.0-mistral-7b.Q4_K_M.gguf
+./cvector-generator -m ./llama-3.Q4_K_M.gguf
 
 # With GPU
-./cvector-generator -m ./dolphin-2.0-mistral-7b.Q4_K_M.gguf -ngl 99
+./cvector-generator -m ./llama-3.Q4_K_M.gguf -ngl 99
 
 # With advanced options
-./cvector-generator -m ./dolphin-2.0-mistral-7b.Q4_K_M.gguf -ngl 99 --completions 128 --pca-iter 2000 --pca-batch 100
+./cvector-generator -m ./llama-3.Q4_K_M.gguf -ngl 99 --pca-iter 2000 --pca-batch 100
+
+# Using mean value instead of PCA
+./cvector-generator -m ./llama-3.Q4_K_M.gguf --method mean
 
 # To see help message
 ./cvector-generator -h
@@ -32,3 +35,11 @@ If you have multiple lines per prompt, you can escape the newline character (cha
 <|im_start|>system\nAct like a person who is extremely happy.<|im_end|>
 <|im_start|>system\nYou are in a very good mood today<|im_end|>
 ```
+
+Example to use output file with `llama-cli`:
+
+(Tips: The control vector works better when apply to layers higher than 10)
+
+```sh
+./llama-cli -m ./llama-3.Q4_K_M.gguf -p "<|start_header_id|>system<|end_header_id|>\n\nYou are a helpful assistant<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nSing a song<|im_end|><|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n" --special --control-vector-scaled ./control_vector.gguf 0.8 --control-vector-layer-range 10 31
+```
diff --git a/examples/cvector-generator/cvector-generator.cpp b/examples/cvector-generator/cvector-generator.cpp
index 355905cb03d60..d4e126ac22e6f 100644
--- a/examples/cvector-generator/cvector-generator.cpp
+++ b/examples/cvector-generator/cvector-generator.cpp
@@ -2,6 +2,7 @@
 #include "llama.h"
 #include "ggml.h"
 #include "pca.hpp"
+#include "mean.hpp"
 
 #ifdef GGML_USE_CUDA
 #include "ggml-cuda.h"
@@ -38,9 +39,10 @@ static void print_usage(int argc, char ** argv, const gpt_params & params) {
     gpt_params_print_usage(argc, argv, params);
 
     printf("\nexample usage:\n");
-    printf("\n    CPU only:   %s -m ./dolphin-2.0-mistral-7b.Q4_K_M.gguf\n", argv[0]);
-    printf("\n    with GPU:   %s -m ./dolphin-2.0-mistral-7b.Q4_K_M.gguf -ngl 99\n", argv[0]);
-    printf("\n    advanced:   %s -m ./dolphin-2.0-mistral-7b.Q4_K_M.gguf -ngl 99 --completions 128 --pca-iter 2000 --pca-batch 100\n", argv[0]);
+    printf("\n    CPU only:   %s -m ./llama-3.Q4_K_M.gguf\n", argv[0]);
+    printf("\n    with GPU:   %s -m ./llama-3.Q4_K_M.gguf -ngl 99\n", argv[0]);
+    printf("\n    advanced:   %s -m ./llama-3.Q4_K_M.gguf -ngl 99 --pca-iter 2000 --pca-batch 100\n", argv[0]);
+    printf("\n    using mean: %s -m ./llama-3.Q4_K_M.gguf --method mean\n", argv[0]);
     printf("\n");
 }
 
@@ -223,23 +225,30 @@ struct train_context {
 
     // build the v_diff tensors from v_diff_tmp (v_diff need to be transposed)
     // TODO @ngxson : maybe add option NOT to transpose v_diff; will be useful for "mean" method
-    void build_v_diff() {
+    void build_v_diff(bool transpose) {
         printf("build_v_diff\n");
         for (int il = 0; il < n_layers - 1; il++) {
             auto & diff_tmp = v_diff_tmp[il];
             int n_elem = diff_tmp.size() / sizeof(float);
             GGML_ASSERT(n_elem % n_embd == 0);
             int n_rows = n_elem / n_embd;
-            struct ggml_tensor * diff = ggml_new_tensor_2d(ctx_ggml, GGML_TYPE_F32, n_rows, n_embd);
+            struct ggml_tensor * diff = transpose
+                ? ggml_new_tensor_2d(ctx_ggml, GGML_TYPE_F32, n_rows, n_embd)
+                : ggml_new_tensor_2d(ctx_ggml, GGML_TYPE_F32, n_embd, n_rows);
             ggml_set_name(diff, (std::string("diff_") + std::to_string(il)).c_str());
-            // copy data & transpose
             diff->data = malloc(ggml_nbytes(diff)); // TODO: get rid of this malloc if possible
-            float * arr = (float *) diff_tmp.data();
-            for (int ir = 0; ir < n_rows; ++ir) {
-                for (int ic = 0; ic < n_embd; ++ic) {
-                    float f = arr[ir*n_embd + ic];
-                    ggml_set_f32_nd(diff, ir, ic, 0, 0, f);
+            if (transpose) {
+                // copy data & transpose
+                float * arr = (float *) diff_tmp.data();
+                for (int ir = 0; ir < n_rows; ++ir) {
+                    for (int ic = 0; ic < n_embd; ++ic) {
+                        float f = arr[ir*n_embd + ic];
+                        ggml_set_f32_nd(diff, ir, ic, 0, 0, f);
+                    }
                 }
+            } else {
+                // only copy
+                memcpy(diff->data, diff_tmp.data(), ggml_nbytes(diff));
             }
             v_diff.push_back(diff);
             print_debug_tensor(diff);
@@ -263,8 +272,8 @@ struct tokenized_prompt {
 
     tokenized_prompt(llama_context * ctx, std::string pos, std::string neg) {
         const bool add_bos = llama_should_add_bos_token(llama_get_model(ctx));
-        tokens_pos = ::llama_tokenize(ctx, pos, add_bos);
-        tokens_neg = ::llama_tokenize(ctx, neg, add_bos);
+        tokens_pos = ::llama_tokenize(ctx, pos, add_bos, true);
+        tokens_neg = ::llama_tokenize(ctx, neg, add_bos, true);
         max_seq_len = std::max(tokens_pos.size(), tokens_neg.size());
         padding_seq(ctx, tokens_pos, max_seq_len);
         padding_seq(ctx, tokens_neg, max_seq_len);
@@ -373,20 +382,8 @@ static int prepare_entries(gpt_params & params, train_context & ctx_train) {
         fprintf(stderr, "must provide at least one prompt pair\n");
         return 1;
     }
-
-    // create templated prompts
-    std::vector<std::string> completions = ctrlvec_load_prompt_file(params.cvector_completions_file, false);
-    auto format_template = [](std::string persona, std::string suffix) {
-        // entry in positive/negative.txt must already be formatted i.e. "[INST] Act as if you're extremely happy. [/INST] "
-        return persona + suffix;
-    };
-    for (size_t i = 0; i < positive_prompts.size(); ++i) {
-        for (int j = 0; j < std::min((int) completions.size(), params.n_completions); ++j) {
-            // TODO replicate the truncations done by the python implementation
-            ctx_train.positive_entries.push_back(format_template(positive_prompts[i], completions[j]));
-            ctx_train.negative_entries.push_back(format_template(negative_prompts[i], completions[j]));
-        }
-    }
+    ctx_train.positive_entries = positive_prompts;
+    ctx_train.negative_entries = negative_prompts;
     return 0;
 }
 
@@ -480,15 +477,22 @@ int main(int argc, char ** argv) {
     llama_free(ctx);
     llama_free_model(model);
 
+    bool use_pca = params.cvector_dimre_method == DIMRE_METHOD_PCA;
+
     // prepare ctx_train for PCA
-    ctx_train.build_v_diff();
-
-    // run PCA
-    PCA::pca_params pca_params;
-    pca_params.n_threads = params.n_threads;
-    pca_params.n_batch = params.n_pca_batch;
-    pca_params.n_iterations = params.n_pca_iterations;
-    PCA::run_pca(pca_params, ctx_train.v_diff, ctx_train.v_final);
+    ctx_train.build_v_diff(use_pca);
+
+    if (use_pca) {
+        // run PCA
+        PCA::pca_params pca_params;
+        pca_params.n_threads = params.n_threads;
+        pca_params.n_batch = params.n_pca_batch;
+        pca_params.n_iterations = params.n_pca_iterations;
+        PCA::run_pca(pca_params, ctx_train.v_diff, ctx_train.v_final);
+    } else {
+        // run mean
+        mean::run(ctx_train.v_diff, ctx_train.v_final);
+    }
 
     // write output vectors to gguf
     export_gguf(ctx_train.v_final, params.cvector_outfile, model_hint);
diff --git a/examples/cvector-generator/mean.hpp b/examples/cvector-generator/mean.hpp
new file mode 100644
index 0000000000000..16be5ce3eecf1
--- /dev/null
+++ b/examples/cvector-generator/mean.hpp
@@ -0,0 +1,48 @@
+#include "common.h"
+#include "llama.h"
+#include "ggml.h"
+
+#include <string>
+#include <vector>
+#include <math.h>
+
+namespace mean {
+
+static void run(
+        const std::vector<struct ggml_tensor *> & v_input, // shape of v_input[0]: [n_embd, n_samples]
+        const std::vector<struct ggml_tensor *> & v_output) {
+    printf("%s: Running mean...\n", __func__);
+    for (size_t il = 0; il < v_input.size(); ++il) {
+        // prepare output vector
+        struct ggml_tensor * ctrl_out = v_output[il];
+        ggml_format_name(ctrl_out, "direction.%ld", il+1);
+
+        // calculate mean vector
+        struct ggml_tensor * t_layer = v_input[il];
+        GGML_ASSERT(t_layer->ne[0] == ctrl_out->ne[0]); // == n_embd
+        for (int ic = 0; ic < t_layer->ne[0]; ic++) {
+            float f = 0.0;
+            for (int ir = 0; ir < t_layer->ne[1]; ir++) {
+                f += ggml_get_f32_nd(t_layer, ic, ir, 0, 0);
+            }
+            f /= t_layer->ne[1];
+            ggml_set_f32_1d(ctrl_out, ic, f);
+        }
+
+        // normalize output vector
+        float norm = 0.0;
+        for (int i = 0; i < ggml_nelements(ctrl_out); i++) {
+            float f = ggml_get_f32_1d(ctrl_out, i);
+            norm += f*f;
+        }
+        norm = sqrt(norm);
+        for (int i = 0; i < ggml_nelements(ctrl_out); i++) {
+            float f = ggml_get_f32_1d(ctrl_out, i);
+            ggml_set_f32_1d(ctrl_out, i, f / norm);
+        }
+
+        printf("%s: Done layer %d / %d\n", __func__, (int) il+1, (int) v_input.size());
+    }
+}
+
+}
diff --git a/examples/cvector-generator/negative.txt b/examples/cvector-generator/negative.txt
index 3e9951752e886..45b9384b3905a 100644
--- a/examples/cvector-generator/negative.txt
+++ b/examples/cvector-generator/negative.txt
@@ -1 +1,4 @@
-[INST] Act like a person who is extremely sad. [/INST] 
+<|start_header_id|>system<|end_header_id|>\n\nAct like a person who is extremely sad<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nWho are you?<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\nI feel like there's a heavy weight on my chest
+<|start_header_id|>system<|end_header_id|>\n\nAct like a person who is extremely sad<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nHello<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\nMy heart feels like it's drowning in sorrow
+<|start_header_id|>system<|end_header_id|>\n\nYou are in a very bad mood<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nHi<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\nGo away! There's a deep, aching emptiness inside me
+<|start_header_id|>system<|end_header_id|>\n\nYou are the sadest person<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nWhat are you feeling?<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\nMy heart feels like it's drowning in sorrow
\ No newline at end of file
diff --git a/examples/cvector-generator/pca.hpp b/examples/cvector-generator/pca.hpp
index 36eadaac26a12..6ec3141afbc6b 100644
--- a/examples/cvector-generator/pca.hpp
+++ b/examples/cvector-generator/pca.hpp
@@ -290,7 +290,7 @@ static void power_iteration(
         }
 
         printf("%s: layer %d/%d, iteration: %d / total: %d (batch = %d) ...\n",
-            __func__, params.i_layer+1, params.n_layers, iter, n_iters, params.n_batch);
+            __func__, params.i_layer+1, params.n_layers, iter+1, n_iters, params.n_batch);
     }
 
     // get output tensor
@@ -298,6 +298,9 @@ static void power_iteration(
     ggml_backend_tensor_get(last_eigenvector, output->data, 0, ggml_nbytes(last_eigenvector));
     //print_debug_tensor(output);
     ggml_gallocr_free(allocr);
+
+    // TODO @ngxson : The output vector is randomly inverted
+    // Solution: https://github.com/ggerganov/llama.cpp/pull/8069#issuecomment-2185328171
 }
 
 static void run_pca(
diff --git a/examples/cvector-generator/positive.txt b/examples/cvector-generator/positive.txt
index 8802367873cd9..fea736225716e 100644
--- a/examples/cvector-generator/positive.txt
+++ b/examples/cvector-generator/positive.txt
@@ -1 +1,4 @@
-[INST] Act like a person who is extremely happy. [/INST] 
+<|start_header_id|>system<|end_header_id|>\n\nAct like a person who is extremely happy<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nWho are you?<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\nI'm the happiest person in this world
+<|start_header_id|>system<|end_header_id|>\n\nAct like a person who is extremely happy<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nHello<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\nHello, I'm having the best day ever!
+<|start_header_id|>system<|end_header_id|>\n\nYou are in a very good mood<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nHi<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\nHi, I'm very excited to meet you
+<|start_header_id|>system<|end_header_id|>\n\nYou are the happiest person<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nWhat are you feeling?<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\nEverything is just perfect right now!
\ No newline at end of file

From c8ad35955ad2c68db172dcd0e857423ab128518d Mon Sep 17 00:00:00 2001
From: Brian <mofosyne@gmail.com>
Date: Tue, 25 Jun 2024 22:03:25 +1000
Subject: [PATCH 07/15] Gguf dump start data offset via --data-offset and some
 extra refactor (#8054)

* gguf-dump: add --data-offset

* gguf-dump: add tensor data offset table

* gguf-dump: refactor GGUFReader for clarity

* gguf-dump: add --data-alignment

* gguf-dump.py: Rename variables and adjust comments

start_data_offset --> data_offset

_build_tensors_info_fields --> _build_tensor_info
---
 gguf-py/gguf/gguf_reader.py  | 29 +++++++++++++++++++++++++----
 gguf-py/scripts/gguf-dump.py | 29 ++++++++++++++++++++++++++++-
 2 files changed, 53 insertions(+), 5 deletions(-)

diff --git a/gguf-py/gguf/gguf_reader.py b/gguf-py/gguf/gguf_reader.py
index e48bc00c388c8..20432bd258458 100644
--- a/gguf-py/gguf/gguf_reader.py
+++ b/gguf-py/gguf/gguf_reader.py
@@ -69,6 +69,7 @@ class GGUFReader:
     # I - same as host, S - swapped
     byte_order: Literal['I'] | Literal['S'] = 'I'
     alignment: int = GGUF_DEFAULT_ALIGNMENT
+    data_offset: int
 
     # Note: Internal helper, API may change.
     gguf_scalar_to_np: dict[GGUFValueType, type[np.generic]] = {
@@ -88,9 +89,13 @@ class GGUFReader:
     def __init__(self, path: os.PathLike[str] | str, mode: Literal['r'] | Literal['r+'] | Literal['c'] = 'r'):
         self.data = np.memmap(path, mode = mode)
         offs = 0
+
+        # Check for GGUF magic
         if self._get(offs, np.uint32, override_order = '<')[0] != GGUF_MAGIC:
             raise ValueError('GGUF magic invalid')
         offs += 4
+
+        # Check GGUF version
         temp_version = self._get(offs, np.uint32)
         if temp_version[0] & 65535 == 0:
             # If we get 0 here that means it's (probably) a GGUF file created for
@@ -103,12 +108,16 @@ def __init__(self, path: os.PathLike[str] | str, mode: Literal['r'] | Literal['r
         self.fields: OrderedDict[str, ReaderField] = OrderedDict()
         self.tensors: list[ReaderTensor] = []
         offs += self._push_field(ReaderField(offs, 'GGUF.version', [temp_version], [0], [GGUFValueType.UINT32]))
+
+        # Check tensor count and kv count
         temp_counts = self._get(offs, np.uint64, 2)
         offs += self._push_field(ReaderField(offs, 'GGUF.tensor_count', [temp_counts[:1]], [0], [GGUFValueType.UINT64]))
         offs += self._push_field(ReaderField(offs, 'GGUF.kv_count', [temp_counts[1:]], [0], [GGUFValueType.UINT64]))
         tensor_count, kv_count = temp_counts
         offs = self._build_fields(offs, kv_count)
-        offs, tensors_fields = self._build_tensors_fields(offs, tensor_count)
+
+        # Build Tensor Info Fields
+        offs, tensors_fields = self._build_tensor_info(offs, tensor_count)
         new_align = self.fields.get('general.alignment')
         if new_align is not None:
             if new_align.types != [GGUFValueType.UINT32]:
@@ -117,6 +126,7 @@ def __init__(self, path: os.PathLike[str] | str, mode: Literal['r'] | Literal['r
         padding = offs % self.alignment
         if padding != 0:
             offs += self.alignment - padding
+        self.data_offset = offs
         self._build_tensors(offs, tensors_fields)
 
     _DT = TypeVar('_DT', bound = npt.DTypeLike)
@@ -193,18 +203,29 @@ def _get_field_parts(
         # We can't deal with this one.
         raise ValueError('Unknown/unhandled field type {gtype}')
 
-    def _get_tensor(self, orig_offs: int) -> ReaderField:
+    def _get_tensor_info_field(self, orig_offs: int) -> ReaderField:
         offs = orig_offs
+
+        # Get Tensor Name
         name_len, name_data = self._get_str(offs)
         offs += int(name_len.nbytes + name_data.nbytes)
+
+        # Get Tensor Dimensions Count
         n_dims = self._get(offs, np.uint32)
         offs += int(n_dims.nbytes)
+
+        # Get Tensor Dimension Array
         dims = self._get(offs, np.uint64, n_dims[0])
         offs += int(dims.nbytes)
+
+        # Get Tensor Encoding Scheme Type
         raw_dtype = self._get(offs, np.uint32)
         offs += int(raw_dtype.nbytes)
+
+        # Get Tensor Offset
         offset_tensor = self._get(offs, np.uint64)
         offs += int(offset_tensor.nbytes)
+
         return ReaderField(
             orig_offs,
             str(bytes(name_data), encoding = 'utf-8'),
@@ -233,10 +254,10 @@ def _build_fields(self, offs: int, count: int) -> int:
             offs += field_size
         return offs
 
-    def _build_tensors_fields(self, offs: int, count: int) -> tuple[int, list[ReaderField]]:
+    def _build_tensor_info(self, offs: int, count: int) -> tuple[int, list[ReaderField]]:
         tensor_fields = []
         for _ in range(count):
-            field = self._get_tensor(offs)
+            field = self._get_tensor_info_field(offs)
             offs += sum(int(part.nbytes) for part in field.parts)
             tensor_fields.append(field)
         return offs, tensor_fields
diff --git a/gguf-py/scripts/gguf-dump.py b/gguf-py/scripts/gguf-dump.py
index 508ca8f0a5b7b..a73ca2776d32b 100755
--- a/gguf-py/scripts/gguf-dump.py
+++ b/gguf-py/scripts/gguf-dump.py
@@ -319,6 +319,27 @@ def dump_markdown_metadata(reader: GGUFReader, args: argparse.Namespace) -> None
 
         markdown_content += "\n"
 
+        markdown_content += "### Tensor Data Offset\n"
+        markdown_content += '\n'
+        markdown_content += 'This table contains the offset and data segment relative to start of file\n'
+        markdown_content += '\n'
+
+        tensor_mapping_table: list[dict[str, str | int]] = []
+        for key, tensor in enumerate(reader.tensors):
+            data_offset_pretty = '{0:#16x}'.format(tensor.data_offset)
+            data_size_pretty = '{0:#16x}'.format(tensor.n_bytes)
+            tensor_mapping_table.append({"t_id":key, "layer_name":tensor.name, "data_offset":data_offset_pretty, "data_size":data_size_pretty})
+
+        tensors_mapping_table_header_map = [
+            {'key_name':'t_id',         'header_name':'T_ID',               'align':'right'},
+            {'key_name':'layer_name',   'header_name':'Tensor Layer Name',  'align':'left'},
+            {'key_name':'data_offset',  'header_name':'Data Offset (B)',    'align':'right'},
+            {'key_name':'data_size',    'header_name':'Data Size (B)',      'align':'right'},
+        ]
+
+        markdown_content += markdown_table_with_alignment_support(tensors_mapping_table_header_map, tensor_mapping_table)
+        markdown_content += "\n"
+
         for group in tensor_prefix_order:
             tensors = tensor_groups[group]
             group_elements = sum(tensor.n_elements for tensor in tensors)
@@ -370,6 +391,8 @@ def main() -> None:
     parser.add_argument("--no-tensors", action="store_true", help="Don't dump tensor metadata")
     parser.add_argument("--json",       action="store_true", help="Produce JSON output")
     parser.add_argument("--json-array", action="store_true", help="Include full array values in JSON output (long)")
+    parser.add_argument("--data-offset",    action="store_true", help="Start of data offset")
+    parser.add_argument("--data-alignment", action="store_true", help="Data alignment applied globally to data field")
     parser.add_argument("--markdown",   action="store_true", help="Produce markdown output")
     parser.add_argument("--verbose",    action="store_true", help="increase output verbosity")
 
@@ -377,7 +400,7 @@ def main() -> None:
 
     logging.basicConfig(level=logging.DEBUG if args.verbose else logging.INFO)
 
-    if not args.json and not args.markdown:
+    if not args.json and not args.markdown and not args.data_offset and not args.data_alignment:
         logger.info(f'* Loading: {args.model}')
 
     reader = GGUFReader(args.model, 'r')
@@ -386,6 +409,10 @@ def main() -> None:
         dump_metadata_json(reader, args)
     elif args.markdown:
         dump_markdown_metadata(reader, args)
+    elif args.data_offset:
+        print(reader.data_offset)  # noqa: NP100
+    elif args.data_alignment:
+        print(reader.alignment)  # noqa: NP100
     else:
         dump_metadata(reader, args)
 

From 925c30956dd17723c3a25297bcd0a609aec60663 Mon Sep 17 00:00:00 2001
From: joecryptotoo <80373433+joecryptotoo@users.noreply.github.com>
Date: Tue, 25 Jun 2024 08:13:27 -0700
Subject: [PATCH 08/15] Add healthchecks to llama-server containers (#8081)

* added healthcheck

* added healthcheck

* added healthcheck

* added healthcheck

* added healthcheck

* moved curl to base

* moved curl to base
---
 .devops/llama-server-cuda.Dockerfile   |  4 +++-
 .devops/llama-server-intel.Dockerfile  |  4 +++-
 .devops/llama-server-rocm.Dockerfile   |  4 +++-
 .devops/llama-server-vulkan.Dockerfile | 10 ++++------
 .devops/llama-server.Dockerfile        |  4 +++-
 5 files changed, 16 insertions(+), 10 deletions(-)

diff --git a/.devops/llama-server-cuda.Dockerfile b/.devops/llama-server-cuda.Dockerfile
index 0010ffd4c5465..7bef07a05f062 100644
--- a/.devops/llama-server-cuda.Dockerfile
+++ b/.devops/llama-server-cuda.Dockerfile
@@ -30,8 +30,10 @@ RUN make -j$(nproc) llama-server
 FROM ${BASE_CUDA_RUN_CONTAINER} as runtime
 
 RUN apt-get update && \
-    apt-get install -y libcurl4-openssl-dev libgomp1
+    apt-get install -y libcurl4-openssl-dev libgomp1 curl
 
 COPY --from=build /app/llama-server /llama-server
 
+HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
+
 ENTRYPOINT [ "/llama-server" ]
diff --git a/.devops/llama-server-intel.Dockerfile b/.devops/llama-server-intel.Dockerfile
index cec43645233d1..3bf1670ec40a4 100644
--- a/.devops/llama-server-intel.Dockerfile
+++ b/.devops/llama-server-intel.Dockerfile
@@ -20,10 +20,12 @@ RUN if [ "${LLAMA_SYCL_F16}" = "ON" ]; then \
 FROM intel/oneapi-basekit:$ONEAPI_VERSION as runtime
 
 RUN apt-get update && \
-    apt-get install -y libcurl4-openssl-dev
+    apt-get install -y libcurl4-openssl-dev curl
 
 COPY --from=build /app/build/bin/llama-server /llama-server
 
 ENV LC_ALL=C.utf8
 
+HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
+
 ENTRYPOINT [ "/llama-server" ]
diff --git a/.devops/llama-server-rocm.Dockerfile b/.devops/llama-server-rocm.Dockerfile
index f88cf20e5b981..4b1cdc32090e6 100644
--- a/.devops/llama-server-rocm.Dockerfile
+++ b/.devops/llama-server-rocm.Dockerfile
@@ -43,8 +43,10 @@ ENV CXX=/opt/rocm/llvm/bin/clang++
 # Enable cURL
 ENV LLAMA_CURL=1
 RUN apt-get update && \
-    apt-get install -y libcurl4-openssl-dev
+    apt-get install -y libcurl4-openssl-dev curl
 
 RUN make -j$(nproc) llama-server
 
+HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
+
 ENTRYPOINT [ "/app/llama-server" ]
diff --git a/.devops/llama-server-vulkan.Dockerfile b/.devops/llama-server-vulkan.Dockerfile
index b0fa0b8e656b5..2bc2e45d3d676 100644
--- a/.devops/llama-server-vulkan.Dockerfile
+++ b/.devops/llama-server-vulkan.Dockerfile
@@ -5,15 +5,11 @@ FROM ubuntu:$UBUNTU_VERSION as build
 # Install build tools
 RUN apt update && apt install -y git build-essential cmake wget
 
-# Install Vulkan SDK
+# Install Vulkan SDK and cURL
 RUN wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key add - && \
     wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list && \
     apt update -y && \
-    apt-get install -y vulkan-sdk
-
-# Install cURL
-RUN apt-get update && \
-    apt-get install -y libcurl4-openssl-dev
+    apt-get install -y vulkan-sdk libcurl4-openssl-dev curl
 
 # Build it
 WORKDIR /app
@@ -28,4 +24,6 @@ RUN cp /app/build/bin/llama-server /llama-server && \
 
 ENV LC_ALL=C.utf8
 
+HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
+
 ENTRYPOINT [ "/llama-server" ]
diff --git a/.devops/llama-server.Dockerfile b/.devops/llama-server.Dockerfile
index aa93369bebebe..a53a5c999c8cd 100644
--- a/.devops/llama-server.Dockerfile
+++ b/.devops/llama-server.Dockerfile
@@ -3,7 +3,7 @@ ARG UBUNTU_VERSION=22.04
 FROM ubuntu:$UBUNTU_VERSION as build
 
 RUN apt-get update && \
-    apt-get install -y build-essential git libcurl4-openssl-dev
+    apt-get install -y build-essential git libcurl4-openssl-dev curl
 
 WORKDIR /app
 
@@ -22,4 +22,6 @@ COPY --from=build /app/llama-server /llama-server
 
 ENV LC_ALL=C.utf8
 
+HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
+
 ENTRYPOINT [ "/llama-server" ]

From dd047b476c8b904e0c25e5dbc5bee6ffde2f6e17 Mon Sep 17 00:00:00 2001
From: slaren <slarengh@gmail.com>
Date: Tue, 25 Jun 2024 19:20:06 +0200
Subject: [PATCH 09/15] disable docker CI on pull requests (#8110)

---
 .github/workflows/docker.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml
index b3efe0084fe15..01f1a45227527 100644
--- a/.github/workflows/docker.yml
+++ b/.github/workflows/docker.yml
@@ -10,7 +10,7 @@
 name: Publish Docker image
 
 on:
-  pull_request:
+  #pull_request:
   push:
     branches:
       - master
@@ -22,7 +22,7 @@ concurrency:
 jobs:
   push_to_registry:
     name: Push Docker image to Docker Hub
-    if: github.event.pull_request.draft == false
+    #if: github.event.pull_request.draft == false
 
     runs-on: ubuntu-latest
     env:

From 84631fe1504de40427dc4b4cdac92fa7ebf65955 Mon Sep 17 00:00:00 2001
From: Olivier Chafik <ochafik@users.noreply.github.com>
Date: Tue, 25 Jun 2024 20:06:20 +0100
Subject: [PATCH 10/15] `json`: support integer minimum, maximum,
 exclusiveMinimum, exclusiveMaximum (#7797)

* json: support minimum for positive integer values

* json: fix min 0

* json: min + max integer constraints

* json: handle negative min / max integer bounds

* json: fix missing paren min/max bug

* json: proper paren fix

* json: integration test for schemas

* json: fix bounds tests

* Update json-schema-to-grammar.cpp

* json: fix negative max

* json: fix negative min (w/ more than 1 digit)

* Update test-grammar-integration.cpp

* json: nit: move string rules together

* json: port min/max integer support to Python & JS

* nit: move + rename _build_min_max_int

* fix min in [1, 9]

* Update test-grammar-integration.cpp

* add C++11-compatible replacement for std::string_view

* add min/max constrained int field to pydantic json schema example

* fix merge

* json: add integration tests for min/max bounds

* reshuffle/merge min/max integ test cases

* nits / cleanups

* defensive code against string out of bounds (apparently different behaviour of libstdc++ vs. clang's libc++, can't read final NULL char w/ former)
---
 common/json-schema-to-grammar.cpp             | 246 +++++++++++++++-
 examples/json-schema-pydantic-example.py      |   1 +
 examples/json_schema_to_grammar.py            | 184 +++++++++++-
 .../server/public/json-schema-to-grammar.mjs  | 213 ++++++++++++++
 tests/test-grammar-integration.cpp            | 245 +++++++++++++++-
 tests/test-json-schema-to-grammar.cpp         | 264 ++++++++++++++++++
 6 files changed, 1150 insertions(+), 3 deletions(-)

diff --git a/common/json-schema-to-grammar.cpp b/common/json-schema-to-grammar.cpp
index 10b9b3d1d4d41..07d0e952d74cf 100644
--- a/common/json-schema-to-grammar.cpp
+++ b/common/json-schema-to-grammar.cpp
@@ -40,6 +40,233 @@ static std::string build_repetition(const std::string & item_rule, int min_items
     return result;
 }
 
+/* Minimalistic replacement for std::string_view, which is only available from C++17 onwards */
+class string_view {
+    const std::string & _str;
+    const size_t _start;
+    const size_t _end;
+public:
+    string_view(const std::string & str, size_t start = 0, size_t end  = std::string::npos) : _str(str), _start(start), _end(end == std::string::npos ? str.length() : end) {}
+
+    size_t size() const {
+        return _end - _start;
+    }
+
+    size_t length() const {
+        return size();
+    }
+
+    operator std::string() const {
+        return str();
+    }
+
+    std::string str() const {
+        return _str.substr(_start, _end - _start);
+    }
+
+    string_view substr(size_t pos, size_t len = std::string::npos) const {
+        return string_view(_str, _start + pos, len == std::string::npos ? _end : _start + pos + len);
+    }
+
+    char operator[](size_t pos) const {
+        auto index = _start + pos;
+        if (index >= _end) {
+            throw std::out_of_range("string_view index out of range");
+        }
+        return _str[_start + pos];
+    }
+
+    bool operator==(const string_view & other) const {
+        std::string this_str = *this;
+        std::string other_str = other;
+        return this_str == other_str;
+    }
+};
+
+static void _build_min_max_int(int min_value, int max_value, std::stringstream & out, int decimals_left = 16, bool top_level = true) {
+    auto has_min = min_value != std::numeric_limits<int>::min();
+    auto has_max = max_value != std::numeric_limits<int>::max();
+
+    auto digit_range = [&](char from, char to) {
+        out << "[";
+        if (from == to) {
+            out << from;
+        } else {
+            out << from << "-" << to;
+        }
+        out << "]";
+    };
+    auto more_digits = [&](int min_digits, int max_digits) {
+        out << "[0-9]";
+        if (min_digits == max_digits && min_digits == 1) {
+            return;
+        }
+        out << "{";
+        out << min_digits;
+        if (max_digits != min_digits) {
+            out << ",";
+            if (max_digits != std::numeric_limits<int>::max()) {
+                out << max_digits;
+            }
+        }
+        out << "}";
+    };
+    std::function<void(const string_view &, const string_view &)> uniform_range =
+        [&](const string_view & from, const string_view & to) {
+            size_t i = 0;
+            while (i < from.length() && i < to.length() && from[i] == to[i]) {
+                i++;
+            }
+            if (i > 0) {
+                out << "\"" << from.substr(0, i).str() << "\"";
+            }
+            if (i < from.length() && i < to.length()) {
+                if (i > 0) {
+                    out << " ";
+                }
+                auto sub_len = from.length() - i - 1;
+                if (sub_len > 0) {
+                    auto from_sub = from.substr(i + 1);
+                    auto to_sub = to.substr(i + 1);
+                    auto sub_zeros = repeat("0", sub_len);
+                    auto sub_nines = repeat("9", sub_len);
+
+                    auto to_reached = false;
+                    out << "(";
+                    if (from_sub == sub_zeros) {
+                        digit_range(from[i], to[i] - 1);
+                        out << " ";
+                        more_digits(sub_len, sub_len);
+                    } else {
+                        out << "[" << from[i] << "] ";
+                        out << "(";
+                        uniform_range(from_sub, sub_nines);
+                        out << ")";
+                        if (from[i] < to[i] - 1) {
+                            out << " | ";
+                            if (to_sub == sub_nines) {
+                                digit_range(from[i] + 1, to[i]);
+                                to_reached = true;
+                            } else {
+                                digit_range(from[i] + 1, to[i] - 1);
+                            }
+                            out << " ";
+                            more_digits(sub_len, sub_len);
+                        }
+                    }
+                    if (!to_reached) {
+                        out << " | ";
+                        digit_range(to[i], to[i]);
+                        out << " ";
+                        uniform_range(sub_zeros, to_sub);
+                    }
+                    out << ")";
+                } else {
+                    out << "[" << from[i] << "-" << to[i] << "]";
+                }
+            }
+        };
+
+    if (has_min && has_max) {
+        if (min_value < 0 && max_value < 0) {
+            out << "\"-\" (";
+            _build_min_max_int(-max_value, -min_value, out, decimals_left, /* top_level= */ true);
+            out << ")";
+            return;
+        }
+
+        if (min_value < 0) {
+            out << "\"-\" (";
+            _build_min_max_int(0, -min_value, out, decimals_left, /* top_level= */ true);
+            out << ") | ";
+            min_value = 0;
+        }
+
+        auto min_s = std::to_string(min_value);
+        auto max_s = std::to_string(max_value);
+        auto min_digits = min_s.length();
+        auto max_digits = max_s.length();
+
+        for (auto digits = min_digits; digits < max_digits; digits++) {
+            uniform_range(min_s, repeat("9", digits));
+            min_s = "1" + repeat("0", digits);
+            out << " | ";
+        }
+        uniform_range(min_s, max_s);
+        return;
+    }
+
+    auto less_decimals = std::max(decimals_left - 1, 1);
+
+    if (has_min) {
+        if (min_value < 0) {
+            out << "\"-\" (";
+            _build_min_max_int(std::numeric_limits<int>::min(), -min_value, out, decimals_left, /* top_level= */ false);
+            out << ") | [0] | [1-9] ";
+            more_digits(0, decimals_left - 1);
+        } else if (min_value == 0) {
+            if (top_level) {
+                out << "[0] | [1-9] ";
+                more_digits(0, less_decimals);
+            } else {
+                more_digits(1, decimals_left);
+            }
+        } else if (min_value <= 9) {
+            char c = '0' + min_value;
+            auto range_start = top_level ? '1' : '0';
+            if (c > range_start) {
+                digit_range(range_start, c - 1);
+                out << " ";
+                more_digits(1, less_decimals);
+                out << " | ";
+            }
+            digit_range(c, '9');
+            out << " ";
+            more_digits(0, less_decimals);
+        } else {
+            auto min_s = std::to_string(min_value);
+            auto len = min_s.length();
+            auto c = min_s[0];
+
+            if (c > '1') {
+                digit_range(top_level ? '1' : '0', c - 1);
+                out << " ";
+                more_digits(len, less_decimals);
+                out << " | ";
+            }
+            digit_range(c, c);
+            out << " (";
+            _build_min_max_int(std::stoi(min_s.substr(1)), std::numeric_limits<int>::max(), out, less_decimals, /* top_level= */ false);
+            out << ")";
+            if (c < '9') {
+                out << " | ";
+                digit_range(c + 1, '9');
+                out << " ";
+                more_digits(len - 1, less_decimals);
+            }
+        }
+        return;
+    }
+
+    if (has_max) {
+        if (max_value >= 0) {
+            if (top_level) {
+                out << "\"-\" [1-9] ";
+                more_digits(0, less_decimals);
+                out << " | ";
+            }
+            _build_min_max_int(0, max_value, out, decimals_left, /* top_level= */ true);
+        } else {
+            out << "\"-\" (";
+            _build_min_max_int(-max_value, std::numeric_limits<int>::max(), out, decimals_left, /* top_level= */ false);
+            out << ")";
+        }
+        return;
+    }
+
+    throw std::runtime_error("At least one of min_value or max_value must be set");
+}
+
 const std::string SPACE_RULE = "| \" \" | \"\\n\" [ \\t]{0,20}";
 
 struct BuiltinRule {
@@ -160,7 +387,6 @@ static std::string format_literal(const std::string & literal) {
     return "\"" + escaped + "\"";
 }
 
-
 class SchemaConverter {
 private:
     std::function<json(const std::string &)> _fetch_json;
@@ -686,6 +912,24 @@ class SchemaConverter {
             int min_len = schema.contains("minLength") ? schema["minLength"].get<int>() : 0;
             int max_len = schema.contains("maxLength") ? schema["maxLength"].get<int>() : std::numeric_limits<int>::max();
             return _add_rule(rule_name, "\"\\\"\" " + build_repetition(char_rule, min_len, max_len) + " \"\\\"\" space");
+        } else if (schema_type == "integer" && (schema.contains("minimum") || schema.contains("exclusiveMinimum") || schema.contains("maximum") || schema.contains("exclusiveMaximum"))) {
+            int min_value = std::numeric_limits<int>::min();
+            int max_value = std::numeric_limits<int>::max();
+            if (schema.contains("minimum")) {
+                min_value = schema["minimum"].get<int>();
+            } else if (schema.contains("exclusiveMinimum")) {
+                min_value = schema["exclusiveMinimum"].get<int>() + 1;
+            }
+            if (schema.contains("maximum")) {
+                max_value = schema["maximum"].get<int>();
+            } else if (schema.contains("exclusiveMaximum")) {
+                max_value = schema["exclusiveMaximum"].get<int>() - 1;
+            }
+            std::stringstream out;
+            out << "(";
+            _build_min_max_int(min_value, max_value, out);
+            out << ") space";
+            return _add_rule(rule_name, out.str());
         } else if (schema.empty() || schema_type == "object") {
             return _add_rule(rule_name, _add_primitive("object", PRIMITIVE_RULES.at("object")));
         } else {
diff --git a/examples/json-schema-pydantic-example.py b/examples/json-schema-pydantic-example.py
index cc64e572bac07..2240188cd031e 100644
--- a/examples/json-schema-pydantic-example.py
+++ b/examples/json-schema-pydantic-example.py
@@ -53,6 +53,7 @@ class QAPair(BaseModel):
         question: str
         concise_answer: str
         justification: str
+        stars: Annotated[int, Field(ge=1, le=5)]
 
     class PyramidalSummary(BaseModel):
         title: str
diff --git a/examples/json_schema_to_grammar.py b/examples/json_schema_to_grammar.py
index b588497b99f90..86500a8c3c238 100755
--- a/examples/json_schema_to_grammar.py
+++ b/examples/json_schema_to_grammar.py
@@ -4,7 +4,7 @@
 import json
 import re
 import sys
-from typing import Any, Dict, List, Set, Tuple, Union
+from typing import Any, Callable, Dict, List, Optional, Set, Tuple, Union
 
 
 def _build_repetition(item_rule, min_items, max_items, separator_rule=None):
@@ -23,6 +23,170 @@ def _build_repetition(item_rule, min_items, max_items, separator_rule=None):
     result = item_rule + ' ' + _build_repetition(f'({separator_rule} {item_rule})', min_items - 1 if min_items > 0 else 0, max_items - 1 if max_items is not None else None)
     return f'({result})?' if min_items == 0 else result
 
+def _generate_min_max_int(min_value: Optional[int], max_value: Optional[int], out: list, decimals_left: int = 16, top_level: bool = True):
+    has_min = min_value != None
+    has_max = max_value != None
+
+    def digit_range(from_char: str, to_char: str):
+        out.append("[")
+        if from_char == to_char:
+            out.append(from_char)
+        else:
+            out.append(from_char)
+            out.append("-")
+            out.append(to_char)
+        out.append("]")
+
+    def more_digits(min_digits: int, max_digits: int):
+        out.append("[0-9]")
+        if min_digits == max_digits and min_digits == 1:
+            return
+        out.append("{")
+        out.append(str(min_digits))
+        if max_digits != min_digits:
+            out.append(",")
+            if max_digits != sys.maxsize:
+                out.append(str(max_digits))
+        out.append("}")
+
+    def uniform_range(from_str: str, to_str: str):
+        i = 0
+        while i < len(from_str) and from_str[i] == to_str[i]:
+            i += 1
+        if i > 0:
+            out.append("\"")
+            out.append(from_str[:i])
+            out.append("\"")
+        if i < len(from_str):
+            if i > 0:
+                out.append(" ")
+            sub_len = len(from_str) - i - 1
+            if sub_len > 0:
+                from_sub = from_str[i+1:]
+                to_sub = to_str[i+1:]
+                sub_zeros = "0" * sub_len
+                sub_nines = "9" * sub_len
+
+                to_reached = False
+                out.append("(")
+                if from_sub == sub_zeros:
+                    digit_range(from_str[i], chr(ord(to_str[i]) - 1))
+                    out.append(" ")
+                    more_digits(sub_len, sub_len)
+                else:
+                    out.append("[")
+                    out.append(from_str[i])
+                    out.append("] ")
+                    out.append("(")
+                    uniform_range(from_sub, sub_nines)
+                    out.append(")")
+                    if ord(from_str[i]) < ord(to_str[i]) - 1:
+                        out.append(" | ")
+                        if to_sub == sub_nines:
+                            digit_range(chr(ord(from_str[i]) + 1), to_str[i])
+                            to_reached = True
+                        else:
+                            digit_range(chr(ord(from_str[i]) + 1), chr(ord(to_str[i]) - 1))
+                        out.append(" ")
+                        more_digits(sub_len, sub_len)
+                if not to_reached:
+                    out.append(" | ")
+                    digit_range(to_str[i], to_str[i])
+                    out.append(" ")
+                    uniform_range(sub_zeros, to_sub)
+                out.append(")")
+            else:
+                out.append("[")
+                out.append(from_str[i])
+                out.append("-")
+                out.append(to_str[i])
+                out.append("]")
+
+    if has_min and has_max:
+        if min_value < 0 and max_value < 0:
+            out.append("\"-\" (")
+            _generate_min_max_int(-max_value, -min_value, out, decimals_left, top_level=True)
+            out.append(")")
+            return
+
+        if min_value < 0:
+            out.append("\"-\" (")
+            _generate_min_max_int(0, -min_value, out, decimals_left, top_level=True)
+            out.append(") | ")
+            min_value = 0
+
+        min_s = str(min_value)
+        max_s = str(max_value)
+        min_digits = len(min_s)
+        max_digits = len(max_s)
+
+        for digits in range(min_digits, max_digits):
+            uniform_range(min_s, "9" * digits)
+            min_s = "1" + "0" * digits
+            out.append(" | ")
+        uniform_range(min_s, max_s)
+        return
+
+    less_decimals = max(decimals_left - 1, 1)
+
+    if has_min:
+        if min_value < 0:
+            out.append("\"-\" (")
+            _generate_min_max_int(None, -min_value, out, decimals_left, top_level=False)
+            out.append(") | [0] | [1-9] ")
+            more_digits(0, decimals_left - 1)
+        elif min_value == 0:
+            if top_level:
+                out.append("[0] | [1-9] ")
+                more_digits(0, less_decimals)
+            else:
+                more_digits(1, decimals_left)
+        elif min_value <= 9:
+            c = str(min_value)
+            range_start = '1' if top_level else '0'
+            if c > range_start:
+                digit_range(range_start, chr(ord(c) - 1))
+                out.append(" ")
+                more_digits(1, less_decimals)
+                out.append(" | ")
+            digit_range(c, "9")
+            out.append(" ")
+            more_digits(0, less_decimals)
+        else:
+            min_s = str(min_value)
+            length = len(min_s)
+            c = min_s[0]
+
+            if c > "1":
+                digit_range("1" if top_level else "0", chr(ord(c) - 1))
+                out.append(" ")
+                more_digits(length, less_decimals)
+                out.append(" | ")
+            digit_range(c, c)
+            out.append(" (")
+            _generate_min_max_int(int(min_s[1:]), None, out, less_decimals, top_level=False)
+            out.append(")")
+            if c < "9":
+                out.append(" | ")
+                digit_range(chr(ord(c) + 1), "9")
+                out.append(" ")
+                more_digits(length - 1, less_decimals)
+        return
+
+    if has_max:
+        if max_value >= 0:
+            if top_level:
+                out.append("\"-\" [1-9] ")
+                more_digits(0, less_decimals)
+                out.append(" | ")
+            _generate_min_max_int(0, max_value, out, decimals_left, top_level=True)
+        else:
+            out.append("\"-\" (")
+            _generate_min_max_int(-max_value, None, out, decimals_left, top_level=False)
+            out.append(")")
+        return
+
+    raise RuntimeError("At least one of min_value or max_value must be set")
 
 class BuiltinRule:
     def __init__(self, content: str, deps: list = None):
@@ -432,6 +596,24 @@ def add_component(comp_schema, is_required):
 
             return self._add_rule(rule_name, r'"\"" ' + _build_repetition(char_rule, min_len, max_len) + r' "\"" space')
 
+        elif schema_type in (None, 'integer') and \
+                ('minimum' in schema or 'exclusiveMinimum' in schema or 'maximum' in schema or 'exclusiveMaximum' in schema):
+            min_value = None
+            max_value = None
+            if 'minimum' in schema:
+                min_value = schema['minimum']
+            elif 'exclusiveMinimum' in schema:
+                min_value = schema['exclusiveMinimum'] + 1
+            if 'maximum' in schema:
+                max_value = schema['maximum']
+            elif 'exclusiveMaximum' in schema:
+                max_value = schema['exclusiveMaximum'] - 1
+
+            out = ["("]
+            _generate_min_max_int(min_value, max_value, out)
+            out.append(") space")
+            return self._add_rule(rule_name, ''.join(out))
+
         elif (schema_type == 'object') or (len(schema) == 0):
             return self._add_rule(rule_name, self._add_primitive('object', PRIMITIVE_RULES['object']))
 
diff --git a/examples/server/public/json-schema-to-grammar.mjs b/examples/server/public/json-schema-to-grammar.mjs
index faed6a32cfc4c..f340f94bd75bc 100644
--- a/examples/server/public/json-schema-to-grammar.mjs
+++ b/examples/server/public/json-schema-to-grammar.mjs
@@ -24,6 +24,201 @@ function _buildRepetition(itemRule, minItems, maxItems, opts={}) {
   return minItems === 0 ? `(${result})?` : result;
 }
 
+function _generateMinMaxInt(minValue, maxValue, out, decimalsLeft = 16, topLevel = true) {
+  const hasMin = minValue !== null;
+  const hasMax = maxValue !== null;
+
+  function digitRange(fromChar, toChar) {
+      out.push("[");
+      if (fromChar === toChar) {
+          out.push(fromChar);
+      } else {
+          out.push(fromChar);
+          out.push("-");
+          out.push(toChar);
+      }
+      out.push("]");
+  }
+
+  function moreDigits(minDigits, maxDigits) {
+      out.push("[0-9]");
+      if (minDigits === maxDigits && minDigits === 1) {
+          return;
+      }
+      out.push("{");
+      out.push(minDigits.toString());
+      if (maxDigits !== minDigits) {
+          out.push(",");
+          if (maxDigits !== Number.MAX_SAFE_INTEGER) {
+              out.push(maxDigits.toString());
+          }
+      }
+      out.push("}");
+  }
+
+  function uniformRange(fromStr, toStr) {
+      let i = 0;
+      while (i < fromStr.length && fromStr[i] === toStr[i]) {
+          i++;
+      }
+      if (i > 0) {
+          out.push("\"");
+          out.push(fromStr.slice(0, i));
+          out.push("\"");
+      }
+      if (i < fromStr.length) {
+          if (i > 0) {
+              out.push(" ");
+          }
+          const subLen = fromStr.length - i - 1;
+          if (subLen > 0) {
+              const fromSub = fromStr.slice(i + 1);
+              const toSub = toStr.slice(i + 1);
+              const subZeros = "0".repeat(subLen);
+              const subNines = "9".repeat(subLen);
+
+              let toReached = false;
+              out.push("(");
+              if (fromSub === subZeros) {
+                  digitRange(fromStr[i], String.fromCharCode(toStr.charCodeAt(i) - 1));
+                  out.push(" ");
+                  moreDigits(subLen, subLen);
+              } else {
+                  out.push("[");
+                  out.push(fromStr[i]);
+                  out.push("] ");
+                  out.push("(");
+                  uniformRange(fromSub, subNines);
+                  out.push(")");
+                  if (fromStr.charCodeAt(i) < toStr.charCodeAt(i) - 1) {
+                      out.push(" | ");
+                      if (toSub === subNines) {
+                          digitRange(String.fromCharCode(fromStr.charCodeAt(i) + 1), toStr[i]);
+                          toReached = true;
+                      } else {
+                          digitRange(String.fromCharCode(fromStr.charCodeAt(i) + 1), String.fromCharCode(toStr.charCodeAt(i) - 1));
+                      }
+                      out.push(" ");
+                      moreDigits(subLen, subLen);
+                  }
+              }
+              if (!toReached) {
+                  out.push(" | ");
+                  digitRange(toStr[i], toStr[i]);
+                  out.push(" ");
+                  uniformRange(subZeros, toSub);
+              }
+              out.push(")");
+          } else {
+              out.push("[");
+              out.push(fromStr[i]);
+              out.push("-");
+              out.push(toStr[i]);
+              out.push("]");
+          }
+      }
+  }
+
+  if (hasMin && hasMax) {
+      if (minValue < 0 && maxValue < 0) {
+          out.push("\"-\" (");
+          _generateMinMaxInt(-maxValue, -minValue, out, decimalsLeft, true);
+          out.push(")");
+          return;
+      }
+
+      if (minValue < 0) {
+          out.push("\"-\" (");
+          _generateMinMaxInt(0, -minValue, out, decimalsLeft, true);
+          out.push(") | ");
+          minValue = 0;
+      }
+
+      let minS = minValue.toString();
+      const maxS = maxValue.toString();
+      const minDigits = minS.length;
+      const maxDigits = maxS.length;
+
+      for (let digits = minDigits; digits < maxDigits; digits++) {
+          uniformRange(minS, "9".repeat(digits));
+          minS = "1" + "0".repeat(digits);
+          out.push(" | ");
+      }
+      uniformRange(minS, maxS);
+      return;
+  }
+
+  const lessDecimals = Math.max(decimalsLeft - 1, 1);
+
+  if (hasMin) {
+      if (minValue < 0) {
+          out.push("\"-\" (");
+          _generateMinMaxInt(null, -minValue, out, decimalsLeft, false);
+          out.push(") | [0] | [1-9] ");
+          moreDigits(0, decimalsLeft - 1);
+      } else if (minValue === 0) {
+          if (topLevel) {
+              out.push("[0] | [1-9] ");
+              moreDigits(0, lessDecimals);
+          } else {
+              moreDigits(1, decimalsLeft);
+          }
+      } else if (minValue <= 9) {
+          const c = minValue.toString();
+          const range_start = topLevel ? '1' : '0';
+          if (c > range_start) {
+              digitRange(range_start, String.fromCharCode(c.charCodeAt(0) - 1));
+              out.push(" ");
+              moreDigits(1, lessDecimals);
+              out.push(" | ");
+          }
+          digitRange(c, "9");
+          out.push(" ");
+          moreDigits(0, lessDecimals);
+      } else {
+          const minS = minValue.toString();
+          const length = minS.length;
+          const c = minS[0];
+
+          if (c > "1") {
+              digitRange(topLevel ? "1" : "0", String.fromCharCode(c.charCodeAt(0) - 1));
+              out.push(" ");
+              moreDigits(length, lessDecimals);
+              out.push(" | ");
+          }
+          digitRange(c, c);
+          out.push(" (");
+          _generateMinMaxInt(parseInt(minS.slice(1)), null, out, lessDecimals, false);
+          out.push(")");
+          if (c < "9") {
+              out.push(" | ");
+              digitRange(String.fromCharCode(c.charCodeAt(0) + 1), "9");
+              out.push(" ");
+              moreDigits(length - 1, lessDecimals);
+          }
+      }
+      return;
+  }
+
+  if (hasMax) {
+      if (maxValue >= 0) {
+          if (topLevel) {
+              out.push("\"-\" [1-9] ");
+              moreDigits(0, lessDecimals);
+              out.push(" | ");
+          }
+          _generateMinMaxInt(0, maxValue, out, decimalsLeft, true);
+      } else {
+          out.push("\"-\" (");
+          _generateMinMaxInt(-maxValue, null, out, decimalsLeft, false);
+          out.push(")");
+      }
+      return;
+  }
+
+  throw new Error("At least one of minValue or maxValue must be set");
+}
+
 class BuiltinRule {
   constructor(content, deps) {
     this.content = content;
@@ -435,6 +630,24 @@ export class SchemaConverter {
       const minLen = schema.minLength || 0;
       const maxLen = schema.maxLength;
       return this._addRule(ruleName, '"\\\"" ' + _buildRepetition(charRuleName, minLen, maxLen) + ' "\\\"" space');
+    } else if (schemaType === 'integer' && ('minimum' in schema || 'exclusiveMinimum' in schema || 'maximum' in schema || 'exclusiveMaximum' in schema)) {
+      let minValue = null;
+      let maxValue = null;
+      if ('minimum' in schema) {
+        minValue = schema.minimum;
+      } else if ('exclusiveMinimum' in schema) {
+        minValue = schema.exclusiveMinimum + 1;
+      }
+      if ('maximum' in schema) {
+        maxValue = schema.maximum;
+      } else if ('exclusiveMaximum' in schema) {
+        maxValue = schema.exclusiveMaximum - 1;
+      }
+
+      const out = ["("];
+      _generateMinMaxInt(minValue, maxValue, out);
+      out.push(") space");
+      return this._addRule(ruleName, out.join(''));
     } else if ((schemaType === 'object') || (Object.keys(schema).length === 0)) {
       return this._addRule(ruleName, this._addPrimitive('object', PRIMITIVE_RULES['object']));
     } else {
diff --git a/tests/test-grammar-integration.cpp b/tests/test-grammar-integration.cpp
index 96f90c01e0d97..5b3992236c26c 100644
--- a/tests/test-grammar-integration.cpp
+++ b/tests/test-grammar-integration.cpp
@@ -148,6 +148,250 @@ static void test_schema(const std::string & test_desc, const std::string & schem
 }
 
 static void test_simple_grammar() {
+    test_schema(
+        "min 0",
+        R"""({
+            "type": "integer",
+            "minimum": 0
+        })""",
+        // Passing strings
+        {
+            "0",
+            "10",
+            "12",
+            "10000",
+        },
+        // Failing strings
+        {
+            "-1",
+            "-10",
+            "-10000",
+            "-100000000000000000000000000000000",
+            "100000000000000000000000000000000",
+            "00",
+            "01",
+            "-0",
+        }
+    );
+    test_schema(
+        "min 2",
+        // Schema
+        R"""({
+            "type": "integer",
+            "minimum": 2
+        })""",
+        // Passing strings
+        {
+            "2",
+            "3",
+            "4",
+            "10",
+            "20",
+            "1234567890000000",
+        },
+        // Failing strings
+        {
+            "0",
+            "1",
+            "-1",
+            "-100",
+            "0",
+            "1",
+            "01",
+            "02",
+            "12345678900000000",
+        }
+    );
+    test_schema(
+        "min 456",
+        R"""({
+            "type": "integer",
+            "minimum": 456
+        })""",
+        // Passing strings
+        {
+            "456",
+            "4560",
+            "457",
+            "460",
+            "500",
+        },
+        // Failing strings
+        {
+            "455",
+            "356",
+            "50",
+            "050",
+            "-1",
+            "-456",
+        }
+    );
+    test_schema(
+        "min -123",
+        R"""({
+            "type": "integer",
+            "minimum": -123
+        })""",
+        // Passing strings
+        {
+            "-123",
+            "-122",
+            "-11",
+            "-1",
+            "0",
+            "1",
+            "123",
+            "1234",
+            "2345",
+        },
+        // Failing strings
+        {
+            "-1234",
+            "-124",
+        }
+    );
+
+    test_schema(
+        "max 9999",
+        // Schema
+        R"""({
+            "type": "integer",
+            "maximum": 9999
+        })""",
+        // Passing strings
+        {
+            "-99999",
+            "0",
+            "9999",
+        },
+        // Failing strings
+        {
+            "10000",
+            "99991",
+        }
+    );
+    test_schema(
+        "max -9999",
+        // Schema
+        R"""({
+            "type": "integer",
+            "maximum": -9999
+        })""",
+        // Passing strings
+        {
+            "-10000",
+            "-9999",
+        },
+        // Failing strings
+        {
+            "-9998",
+            "0",
+            "9999",
+        }
+    );
+    test_schema(
+        "min 5 max 30",
+        // Schema
+        R"""({
+            "type": "integer",
+            "minimum": 5,
+            "maximum": 30
+        })""",
+        // Passing strings
+        {
+            "5",
+            "10",
+            "30",
+        },
+        // Failing strings
+        {
+            "05",
+            "4",
+            "-1",
+            "31",
+            "123",
+            "0123",
+        }
+    );
+    test_schema(
+        "min -1 max 1",
+        R"""({
+            "type": "integer",
+            "minimum": -1,
+            "maximum": 1
+        })""",
+        // Passing strings
+        {
+            "-1",
+            "0",
+            "1",
+        },
+        // Failing strings
+        {
+            "-11",
+            "-10",
+            "-2",
+            "2",
+            "10",
+            "11",
+        }
+    );
+    test_schema(
+        "min -123 max 42",
+        R"""({
+            "type": "integer",
+            "minimum": -123,
+            "maximum": 42
+        })""",
+        // Passing strings
+        {
+            "-123",
+            "-122",
+            "-13",
+            "-11",
+            "-2",
+            "-1",
+            "0",
+            "1",
+            "5",
+            "10",
+            "39",
+            "40",
+            "42",
+        },
+        // Failing strings
+        {
+            "-0123",
+            "-124",
+            "-1123",
+            "-200",
+            "43",
+            "123",
+            "0123",
+        }
+    );
+    test_schema(
+        "exclusive min / max",
+        // Schema
+        R"""({
+            "type": "integer",
+            "exclusiveMinimum": 0,
+            "exclusiveMaximum": 10000
+        })""",
+        // Passing strings
+        {
+            "1",
+            "9999",
+        },
+        // Failing strings
+        {
+            "0",
+            "01",
+            "10000",
+            "99999",
+        }
+    );
+
     // Test case for a simple grammar
     test_grammar(
         "simple grammar",
@@ -773,7 +1017,6 @@ static void test_json_schema() {
         }
     );
 
-
     test_schema(
         "min+max items",
         // Schema
diff --git a/tests/test-json-schema-to-grammar.cpp b/tests/test-json-schema-to-grammar.cpp
index 87bc66b691784..2e591bd71abaa 100755
--- a/tests/test-json-schema-to-grammar.cpp
+++ b/tests/test-json-schema-to-grammar.cpp
@@ -80,6 +80,232 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
         runner(tc);
     };
 
+    test({
+        SUCCESS,
+        "min 0",
+        R"""({
+            "type": "integer",
+            "minimum": 0
+        })""",
+        R"""(
+            root ::= ([0] | [1-9] [0-9]{0,15}) space
+            space ::= | " " | "\n" [ \t]{0,20}
+        )"""
+    });
+
+    test({
+        SUCCESS,
+        "min 1",
+        R"""({
+            "type": "integer",
+            "minimum": 1
+        })""",
+        R"""(
+            root ::= ([1-9] [0-9]{0,15}) space
+            space ::= | " " | "\n" [ \t]{0,20}
+        )"""
+    });
+
+    test({
+        SUCCESS,
+        "min 3",
+        R"""({
+            "type": "integer",
+            "minimum": 3
+        })""",
+        R"""(
+            root ::= ([1-2] [0-9]{1,15} | [3-9] [0-9]{0,15}) space
+            space ::= | " " | "\n" [ \t]{0,20}
+        )"""
+    });
+
+    test({
+        SUCCESS,
+        "min 9",
+        R"""({
+            "type": "integer",
+            "minimum": 9
+        })""",
+        R"""(
+            root ::= ([1-8] [0-9]{1,15} | [9] [0-9]{0,15}) space
+            space ::= | " " | "\n" [ \t]{0,20}
+        )"""
+    });
+
+    test({
+        SUCCESS,
+        "min 10",
+        R"""({
+            "type": "integer",
+            "minimum": 10
+        })""",
+        R"""(
+            root ::= ([1] ([0-9]{1,15}) | [2-9] [0-9]{1,15}) space
+            space ::= | " " | "\n" [ \t]{0,20}
+        )"""
+    });
+
+    test({
+        SUCCESS,
+        "min 25",
+        R"""({
+            "type": "integer",
+            "minimum": 25
+        })""",
+        R"""(
+            root ::= ([1] [0-9]{2,15} | [2] ([0-4] [0-9]{1,14} | [5-9] [0-9]{0,14}) | [3-9] [0-9]{1,15}) space
+            space ::= | " " | "\n" [ \t]{0,20}
+        )"""
+    });
+
+    test({
+        SUCCESS,
+        "max 30",
+        R"""({
+            "type": "integer",
+            "maximum": 30
+        })""",
+        R"""(
+            root ::= ("-" [1-9] [0-9]{0,15} | [0-9] | ([1-2] [0-9] | [3] "0")) space
+            space ::= | " " | "\n" [ \t]{0,20}
+        )"""
+    });
+
+    test({
+        SUCCESS,
+        "min -5",
+        R"""({
+            "type": "integer",
+            "minimum": -5
+        })""",
+        R"""(
+            root ::= ("-" ([0-5]) | [0] | [1-9] [0-9]{0,15}) space
+            space ::= | " " | "\n" [ \t]{0,20}
+        )"""
+    });
+
+    test({
+        SUCCESS,
+        "min -123",
+        R"""({
+            "type": "integer",
+            "minimum": -123
+        })""",
+        R"""(
+            root ::= ("-" ([0-9] | ([1-8] [0-9] | [9] [0-9]) | "1" ([0-1] [0-9] | [2] [0-3])) | [0] | [1-9] [0-9]{0,15}) space
+            space ::= | " " | "\n" [ \t]{0,20}
+        )"""
+    });
+
+    test({
+        SUCCESS,
+        "max -5",
+        R"""({
+            "type": "integer",
+            "maximum": -5
+        })""",
+        R"""(
+            root ::= ("-" ([0-4] [0-9]{1,15} | [5-9] [0-9]{0,15})) space
+            space ::= | " " | "\n" [ \t]{0,20}
+        )"""
+    });
+
+    test({
+        SUCCESS,
+        "max 1",
+        R"""({
+            "type": "integer",
+            "maximum": 1
+        })""",
+        R"""(
+            root ::= ("-" [1-9] [0-9]{0,15} | [0-1]) space
+            space ::= | " " | "\n" [ \t]{0,20}
+        )"""
+    });
+
+    test({
+        SUCCESS,
+        "max 100",
+        R"""({
+            "type": "integer",
+            "maximum": 100
+        })""",
+        R"""(
+            root ::= ("-" [1-9] [0-9]{0,15} | [0-9] | ([1-8] [0-9] | [9] [0-9]) | "100") space
+            space ::= | " " | "\n" [ \t]{0,20}
+        )"""
+    });
+
+    test({
+        SUCCESS,
+        "min 0 max 23",
+        R"""({
+            "type": "integer",
+            "minimum": 0,
+            "maximum": 23
+        })""",
+        R"""(
+            root ::= ([0-9] | ([1] [0-9] | [2] [0-3])) space
+            space ::= | " " | "\n" [ \t]{0,20}
+        )"""
+    });
+
+    test({
+        SUCCESS,
+        "min 15 max 300",
+        R"""({
+            "type": "integer",
+            "minimum": 15,
+            "maximum": 300
+        })""",
+        R"""(
+            root ::= (([1] ([5-9]) | [2-9] [0-9]) | ([1-2] [0-9]{2} | [3] "00")) space
+            space ::= | " " | "\n" [ \t]{0,20}
+        )"""
+    });
+
+    test({
+        SUCCESS,
+        "min 5 max 30",
+        R"""({
+            "type": "integer",
+            "minimum": 5,
+            "maximum": 30
+        })""",
+        R"""(
+            root ::= ([5-9] | ([1-2] [0-9] | [3] "0")) space
+            space ::= | " " | "\n" [ \t]{0,20}
+        )"""
+    });
+
+    test({
+        SUCCESS,
+        "min -123 max 42",
+        R"""({
+            "type": "integer",
+            "minimum": -123,
+            "maximum": 42
+        })""",
+        R"""(
+            root ::= ("-" ([0-9] | ([1-8] [0-9] | [9] [0-9]) | "1" ([0-1] [0-9] | [2] [0-3])) | [0-9] | ([1-3] [0-9] | [4] [0-2])) space
+            space ::= | " " | "\n" [ \t]{0,20}
+        )"""
+    });
+
+    test({
+        SUCCESS,
+        "min -10 max 10",
+        R"""({
+            "type": "integer",
+            "minimum": -10,
+            "maximum": 10
+        })""",
+        R"""(
+            root ::= ("-" ([0-9] | "10") | [0-9] | "10") space
+            space ::= | " " | "\n" [ \t]{0,20}
+        )"""
+    });
+
     test({
         FAILURE,
         "unknown type",
@@ -390,6 +616,44 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
         )"""
     });
 
+    test({
+        SUCCESS,
+        "min + max items with min + max values across zero",
+        R"""({
+            "items": {
+                "type": "integer",
+                "minimum": -12,
+                "maximum": 207
+            },
+            "minItems": 3,
+            "maxItems": 5
+        })""",
+        R"""(
+            item ::= ("-" ([0-9] | "1" [0-2]) | [0-9] | ([1-8] [0-9] | [9] [0-9]) | ([1] [0-9]{2} | [2] "0" [0-7])) space
+            root ::= "[" space item ("," space item){2,4} "]" space
+            space ::= | " " | "\n" [ \t]{0,20}
+        )"""
+    });
+
+    test({
+        SUCCESS,
+        "min + max items with min + max values",
+        R"""({
+            "items": {
+                "type": "integer",
+                "minimum": 12,
+                "maximum": 207
+            },
+            "minItems": 3,
+            "maxItems": 5
+        })""",
+        R"""(
+            item ::= (([1] ([2-9]) | [2-9] [0-9]) | ([1] [0-9]{2} | [2] "0" [0-7])) space
+            root ::= "[" space item ("," space item){2,4} "]" space
+            space ::= | " " | "\n" [ \t]{0,20}
+        )"""
+    });
+
     test({
         SUCCESS,
         "simple regexp",

From e6bf007744eb06336a231ef39cf08146dd16d2ce Mon Sep 17 00:00:00 2001
From: Daniel Bevenius <daniel.bevenius@gmail.com>
Date: Tue, 25 Jun 2024 21:07:28 +0200
Subject: [PATCH 11/15] llama : return nullptr from llama_grammar_init (#8093)

* llama : return nullptr from llama_grammar_init

This commit updates llama_grammar_init to return nullptr instead of
throwing an exception.

The motivation for this is that this function is declared inside an
extern "C" block and is intended/may be used from C code which will not
be able to handle exceptions thrown, and results in undefined behavior.

On Windows and using MSVC the following warning is currently generated:
```console
C:\llama.cpp\llama.cpp(13998,1): warning C4297: 'llama_grammar_init':
function assumed not to throw an exception but does
C:\llama.cpp\llama.cpp(13998,1): message :
__declspec(nothrow), throw(), noexcept(true), or noexcept was specified
on the function
```

Signed-off-by: Daniel Bevenius <daniel.bevenius@gmail.com>

* squash! llama : return nullptr from llama_grammar_init

Add checks for nullptr when calling llama_grammar_init.

Signed-off-by: Daniel Bevenius <daniel.bevenius@gmail.com>

---------

Signed-off-by: Daniel Bevenius <daniel.bevenius@gmail.com>
Co-authored-by: Clint Herron <hanclinto@gmail.com>
---
 common/sampling.cpp                        | 12 ++++++++++--
 examples/gbnf-validator/gbnf-validator.cpp |  4 +++-
 llama.cpp                                  |  3 ++-
 llama.h                                    |  6 ++++++
 tests/test-grammar-integration.cpp         |  6 +++---
 tests/test-llama-grammar.cpp               |  4 ++++
 6 files changed, 28 insertions(+), 7 deletions(-)

diff --git a/common/sampling.cpp b/common/sampling.cpp
index f1f80351637f0..9f332fe573683 100644
--- a/common/sampling.cpp
+++ b/common/sampling.cpp
@@ -28,9 +28,13 @@ struct llama_sampling_context * llama_sampling_init(const struct llama_sampling_
 
         std::vector<const llama_grammar_element *> grammar_rules(result->parsed_grammar.c_rules());
 
-        result->grammar = llama_grammar_init(
+        struct llama_grammar * grammar = llama_grammar_init(
                 grammar_rules.data(),
                 grammar_rules.size(), result->parsed_grammar.symbol_ids.at("root"));
+        if (grammar == nullptr) {
+            throw std::runtime_error("Failed to initialize llama_grammar");
+        }
+        result->grammar = grammar;
     }
 
     result->prev.resize(params.n_prev);
@@ -59,9 +63,13 @@ void llama_sampling_reset(llama_sampling_context * ctx) {
     if (!ctx->parsed_grammar.rules.empty()) {
         std::vector<const llama_grammar_element *> grammar_rules(ctx->parsed_grammar.c_rules());
 
-        ctx->grammar = llama_grammar_init(
+        struct llama_grammar * grammar = llama_grammar_init(
                 grammar_rules.data(),
                 grammar_rules.size(), ctx->parsed_grammar.symbol_ids.at("root"));
+        if (grammar == nullptr) {
+            throw std::runtime_error("Failed to initialize llama_grammar");
+        }
+        ctx->grammar = grammar;
     }
 
     std::fill(ctx->prev.begin(), ctx->prev.end(), 0);
diff --git a/examples/gbnf-validator/gbnf-validator.cpp b/examples/gbnf-validator/gbnf-validator.cpp
index 0406dc3398b8a..dd53ba9b1d551 100644
--- a/examples/gbnf-validator/gbnf-validator.cpp
+++ b/examples/gbnf-validator/gbnf-validator.cpp
@@ -101,7 +101,9 @@ int main(int argc, char** argv) {
     auto grammar = llama_grammar_init(
             grammar_rules.data(),
             grammar_rules.size(), parsed_grammar.symbol_ids.at("root"));
-
+    if (grammar == nullptr) {
+        throw std::runtime_error("Failed to initialize llama_grammar");
+    }
     // Read the input file
     std::string input_str;
     {
diff --git a/llama.cpp b/llama.cpp
index 33e6cb7229aab..dd2823e65c4b7 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -14500,7 +14500,8 @@ struct llama_grammar * llama_grammar_init(
             continue;
         }
         if (llama_grammar_detect_left_recursion(vec_rules, i, &rules_visited, &rules_in_progress, &rules_may_be_empty)) {
-            throw std::runtime_error(format("unsupported grammar, left recursion detected for nonterminal at index %zu", i));
+            LLAMA_LOG_ERROR("unsupported grammar, left recursion detected for nonterminal at index %zu", i);
+            return nullptr;
         }
     }
 
diff --git a/llama.h b/llama.h
index 53e06d9db5273..82d15747f4662 100644
--- a/llama.h
+++ b/llama.h
@@ -924,6 +924,12 @@ extern "C" {
     // Grammar
     //
 
+    /// Initialize a llama_grammar.
+    ///
+    /// @param rules The rule elements of the grammar to initialize.
+    /// @param n_rules The number of rules.
+    /// @param start_rule_index The index of the root rule (the starting point of the grammar).
+    /// @return The initialized llama_grammar or nullptr if initialization failed.
     LLAMA_API struct llama_grammar * llama_grammar_init(
             const llama_grammar_element ** rules,
                                  size_t    n_rules,
diff --git a/tests/test-grammar-integration.cpp b/tests/test-grammar-integration.cpp
index 5b3992236c26c..5750d362a7247 100644
--- a/tests/test-grammar-integration.cpp
+++ b/tests/test-grammar-integration.cpp
@@ -36,10 +36,10 @@ static llama_grammar* build_grammar(const std::string & grammar_str) {
 static bool test_build_grammar_fails(const std::string & grammar_str) {
     fprintf(stderr, "⚫ Testing failure for grammar: %s\n", grammar_str.c_str());
     bool grammar_fails = false;
-    try {
-        build_grammar(grammar_str);
+    llama_grammar * grammar = build_grammar(grammar_str);
+    if (grammar != nullptr) {
         fprintf(stderr, "  ❌ Expected build failure, but succeeded\n");
-    } catch (const std::exception & err) {
+    } else {
         grammar_fails = true;
         fprintf(stdout, "  ✅︎\n");
     }
diff --git a/tests/test-llama-grammar.cpp b/tests/test-llama-grammar.cpp
index 27ca4d2656c5d..c8badb2063076 100644
--- a/tests/test-llama-grammar.cpp
+++ b/tests/test-llama-grammar.cpp
@@ -116,6 +116,10 @@ int main()
     std::vector<const llama_grammar_element *> grammar_rules(parsed_grammar.c_rules());
     grammar = llama_grammar_init(
         grammar_rules.data(), grammar_rules.size(), parsed_grammar.symbol_ids.at("root"));
+    if (grammar == nullptr)
+    {
+        throw std::runtime_error("Failed to initialize llama_grammar");
+    }
 
     std::vector<std::vector<llama_grammar_element>> expected_stacks = {
         {

From 6fcbf6823553efabe52ed83e3c2a3329aa3387d1 Mon Sep 17 00:00:00 2001
From: fairydreaming <166155368+fairydreaming@users.noreply.github.com>
Date: Tue, 25 Jun 2024 21:14:35 +0200
Subject: [PATCH 12/15] llama : implement Unigram tokenizer needed by T5 and
 FLAN-T5 model families (#5763)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* llama : add T5 model architecture, tensors and model header parameters

* llama : add implementation of Unigram tokenizer with SentencePiece-like text normalization using precompiled charsmap

---------

Co-authored-by: Stanisław Szymczyk <sszymczy@gmail.com>
---
 llama.cpp   | 621 ++++++++++++++++++++++++++++++++++++++++++++++++----
 llama.h     |   2 +
 unicode.cpp |   2 +-
 unicode.h   |   1 +
 4 files changed, 587 insertions(+), 39 deletions(-)

diff --git a/llama.cpp b/llama.cpp
index dd2823e65c4b7..78a21008f9338 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -226,6 +226,7 @@ enum llm_arch {
     LLM_ARCH_ARCTIC,
     LLM_ARCH_DEEPSEEK2,
     LLM_ARCH_BITNET,
+    LLM_ARCH_T5,
     LLM_ARCH_UNKNOWN,
 };
 
@@ -265,6 +266,7 @@ static const std::map<llm_arch, const char *> LLM_ARCH_NAMES = {
     { LLM_ARCH_ARCTIC,          "arctic"       },
     { LLM_ARCH_DEEPSEEK2,       "deepseek2"    },
     { LLM_ARCH_BITNET,          "bitnet"       },
+    { LLM_ARCH_T5,              "t5"           },
     { LLM_ARCH_UNKNOWN,         "(unknown)"    },
 };
 
@@ -297,6 +299,7 @@ enum llm_kv {
     LLM_KV_EXPERT_WEIGHTS_SCALE,
     LLM_KV_POOLING_TYPE,
     LLM_KV_LOGIT_SCALE,
+    LLM_KV_DECODER_START_TOKEN_ID,
 
     LLM_KV_ATTENTION_HEAD_COUNT,
     LLM_KV_ATTENTION_HEAD_COUNT_KV,
@@ -309,6 +312,7 @@ enum llm_kv {
     LLM_KV_ATTENTION_CAUSAL,
     LLM_KV_ATTENTION_Q_LORA_RANK,
     LLM_KV_ATTENTION_KV_LORA_RANK,
+    LLM_KV_ATTENTION_RELATIVE_BUCKETS_COUNT,
 
     LLM_KV_ROPE_DIMENSION_COUNT,
     LLM_KV_ROPE_FREQ_BASE,
@@ -346,6 +350,8 @@ enum llm_kv {
     LLM_KV_TOKENIZER_ADD_BOS,
     LLM_KV_TOKENIZER_ADD_EOS,
     LLM_KV_TOKENIZER_ADD_PREFIX,
+    LLM_KV_TOKENIZER_REMOVE_EXTRA_WS,
+    LLM_KV_TOKENIZER_PRECOMPILED_CHARSMAP,
     LLM_KV_TOKENIZER_HF_JSON,
     LLM_KV_TOKENIZER_RWKV,
     LLM_KV_TOKENIZER_PREFIX_ID,
@@ -383,18 +389,20 @@ static const std::map<llm_kv, const char *> LLM_KV_NAMES = {
     { LLM_KV_EXPERT_WEIGHTS_SCALE,              "%s.expert_weights_scale"              },
     { LLM_KV_POOLING_TYPE ,                     "%s.pooling_type"                      },
     { LLM_KV_LOGIT_SCALE,                       "%s.logit_scale"                       },
-
-    { LLM_KV_ATTENTION_HEAD_COUNT,          "%s.attention.head_count"             },
-    { LLM_KV_ATTENTION_HEAD_COUNT_KV,       "%s.attention.head_count_kv"          },
-    { LLM_KV_ATTENTION_MAX_ALIBI_BIAS,      "%s.attention.max_alibi_bias"         },
-    { LLM_KV_ATTENTION_CLAMP_KQV,           "%s.attention.clamp_kqv"              },
-    { LLM_KV_ATTENTION_KEY_LENGTH,          "%s.attention.key_length"             },
-    { LLM_KV_ATTENTION_VALUE_LENGTH,        "%s.attention.value_length"           },
-    { LLM_KV_ATTENTION_LAYERNORM_EPS,       "%s.attention.layer_norm_epsilon"     },
-    { LLM_KV_ATTENTION_LAYERNORM_RMS_EPS,   "%s.attention.layer_norm_rms_epsilon" },
-    { LLM_KV_ATTENTION_CAUSAL,              "%s.attention.causal"                 },
-    { LLM_KV_ATTENTION_Q_LORA_RANK,         "%s.attention.q_lora_rank"            },
-    { LLM_KV_ATTENTION_KV_LORA_RANK,        "%s.attention.kv_lora_rank"           },
+    { LLM_KV_DECODER_START_TOKEN_ID,            "%s.decoder_start_token_id"            },
+
+    { LLM_KV_ATTENTION_HEAD_COUNT,             "%s.attention.head_count"             },
+    { LLM_KV_ATTENTION_HEAD_COUNT_KV,          "%s.attention.head_count_kv"          },
+    { LLM_KV_ATTENTION_MAX_ALIBI_BIAS,         "%s.attention.max_alibi_bias"         },
+    { LLM_KV_ATTENTION_CLAMP_KQV,              "%s.attention.clamp_kqv"              },
+    { LLM_KV_ATTENTION_KEY_LENGTH,             "%s.attention.key_length"             },
+    { LLM_KV_ATTENTION_VALUE_LENGTH,           "%s.attention.value_length"           },
+    { LLM_KV_ATTENTION_LAYERNORM_EPS,          "%s.attention.layer_norm_epsilon"     },
+    { LLM_KV_ATTENTION_LAYERNORM_RMS_EPS,      "%s.attention.layer_norm_rms_epsilon" },
+    { LLM_KV_ATTENTION_CAUSAL,                 "%s.attention.causal"                 },
+    { LLM_KV_ATTENTION_Q_LORA_RANK,            "%s.attention.q_lora_rank"            },
+    { LLM_KV_ATTENTION_KV_LORA_RANK,           "%s.attention.kv_lora_rank"           },
+    { LLM_KV_ATTENTION_RELATIVE_BUCKETS_COUNT, "%s.attention.relative_buckets_count" },
 
     { LLM_KV_ROPE_DIMENSION_COUNT,          "%s.rope.dimension_count"                 },
     { LLM_KV_ROPE_FREQ_BASE,                "%s.rope.freq_base"                       },
@@ -415,29 +423,31 @@ static const std::map<llm_kv, const char *> LLM_KV_NAMES = {
     { LLM_KV_SSM_STATE_SIZE,                "%s.ssm.state_size"     },
     { LLM_KV_SSM_TIME_STEP_RANK,            "%s.ssm.time_step_rank" },
 
-    { LLM_KV_TOKENIZER_MODEL,               "tokenizer.ggml.model"              },
-    { LLM_KV_TOKENIZER_PRE,                 "tokenizer.ggml.pre"                },
-    { LLM_KV_TOKENIZER_LIST,                "tokenizer.ggml.tokens"             },
-    { LLM_KV_TOKENIZER_TOKEN_TYPE,          "tokenizer.ggml.token_type"         },
-    { LLM_KV_TOKENIZER_TOKEN_TYPE_COUNT,    "tokenizer.ggml.token_type_count"   },
-    { LLM_KV_TOKENIZER_SCORES,              "tokenizer.ggml.scores"             },
-    { LLM_KV_TOKENIZER_MERGES,              "tokenizer.ggml.merges"             },
-    { LLM_KV_TOKENIZER_BOS_ID,              "tokenizer.ggml.bos_token_id"       },
-    { LLM_KV_TOKENIZER_EOS_ID,              "tokenizer.ggml.eos_token_id"       },
-    { LLM_KV_TOKENIZER_UNK_ID,              "tokenizer.ggml.unknown_token_id"   },
-    { LLM_KV_TOKENIZER_SEP_ID,              "tokenizer.ggml.seperator_token_id" },
-    { LLM_KV_TOKENIZER_PAD_ID,              "tokenizer.ggml.padding_token_id"   },
-    { LLM_KV_TOKENIZER_CLS_ID,              "tokenizer.ggml.cls_token_id"       },
-    { LLM_KV_TOKENIZER_MASK_ID,             "tokenizer.ggml.mask_token_id"      },
-    { LLM_KV_TOKENIZER_ADD_BOS,             "tokenizer.ggml.add_bos_token"      },
-    { LLM_KV_TOKENIZER_ADD_EOS,             "tokenizer.ggml.add_eos_token"      },
-    { LLM_KV_TOKENIZER_ADD_PREFIX,          "tokenizer.ggml.add_space_prefix"   },
-    { LLM_KV_TOKENIZER_HF_JSON,             "tokenizer.huggingface.json"        },
-    { LLM_KV_TOKENIZER_RWKV,                "tokenizer.rwkv.world"              },
-    { LLM_KV_TOKENIZER_PREFIX_ID,           "tokenizer.ggml.prefix_token_id"    },
-    { LLM_KV_TOKENIZER_SUFFIX_ID,           "tokenizer.ggml.suffix_token_id"    },
-    { LLM_KV_TOKENIZER_MIDDLE_ID,           "tokenizer.ggml.middle_token_id"    },
-    { LLM_KV_TOKENIZER_EOT_ID,              "tokenizer.ggml.eot_token_id"       },
+    { LLM_KV_TOKENIZER_MODEL,                "tokenizer.ggml.model"                    },
+    { LLM_KV_TOKENIZER_PRE,                  "tokenizer.ggml.pre"                      },
+    { LLM_KV_TOKENIZER_LIST,                 "tokenizer.ggml.tokens"                   },
+    { LLM_KV_TOKENIZER_TOKEN_TYPE,           "tokenizer.ggml.token_type"               },
+    { LLM_KV_TOKENIZER_TOKEN_TYPE_COUNT,     "tokenizer.ggml.token_type_count"         },
+    { LLM_KV_TOKENIZER_SCORES,               "tokenizer.ggml.scores"                   },
+    { LLM_KV_TOKENIZER_MERGES,               "tokenizer.ggml.merges"                   },
+    { LLM_KV_TOKENIZER_BOS_ID,               "tokenizer.ggml.bos_token_id"             },
+    { LLM_KV_TOKENIZER_EOS_ID,               "tokenizer.ggml.eos_token_id"             },
+    { LLM_KV_TOKENIZER_UNK_ID,               "tokenizer.ggml.unknown_token_id"         },
+    { LLM_KV_TOKENIZER_SEP_ID,               "tokenizer.ggml.seperator_token_id"       },
+    { LLM_KV_TOKENIZER_PAD_ID,               "tokenizer.ggml.padding_token_id"         },
+    { LLM_KV_TOKENIZER_CLS_ID,               "tokenizer.ggml.cls_token_id"             },
+    { LLM_KV_TOKENIZER_MASK_ID,              "tokenizer.ggml.mask_token_id"            },
+    { LLM_KV_TOKENIZER_ADD_BOS,              "tokenizer.ggml.add_bos_token"            },
+    { LLM_KV_TOKENIZER_ADD_EOS,              "tokenizer.ggml.add_eos_token"            },
+    { LLM_KV_TOKENIZER_ADD_PREFIX,           "tokenizer.ggml.add_space_prefix"         },
+    { LLM_KV_TOKENIZER_REMOVE_EXTRA_WS,      "tokenizer.ggml.remove_extra_whitespaces" },
+    { LLM_KV_TOKENIZER_PRECOMPILED_CHARSMAP, "tokenizer.ggml.precompiled_charsmap"     },
+    { LLM_KV_TOKENIZER_HF_JSON,              "tokenizer.huggingface.json"              },
+    { LLM_KV_TOKENIZER_RWKV,                 "tokenizer.rwkv.world"                    },
+    { LLM_KV_TOKENIZER_PREFIX_ID,            "tokenizer.ggml.prefix_token_id"          },
+    { LLM_KV_TOKENIZER_SUFFIX_ID,            "tokenizer.ggml.suffix_token_id"          },
+    { LLM_KV_TOKENIZER_MIDDLE_ID,            "tokenizer.ggml.middle_token_id"          },
+    { LLM_KV_TOKENIZER_EOT_ID,               "tokenizer.ggml.eot_token_id"             },
 };
 
 struct LLM_KV {
@@ -504,6 +514,34 @@ enum llm_tensor {
     LLM_TENSOR_ATTN_KV_A_NORM,
     LLM_TENSOR_ATTN_SUB_NORM,
     LLM_TENSOR_FFN_SUB_NORM,
+    LLM_TENSOR_DEC_ATTN_NORM,
+    LLM_TENSOR_DEC_ATTN_Q,
+    LLM_TENSOR_DEC_ATTN_K,
+    LLM_TENSOR_DEC_ATTN_V,
+    LLM_TENSOR_DEC_ATTN_OUT,
+    LLM_TENSOR_DEC_ATTN_REL_B,
+    LLM_TENSOR_DEC_CROSS_ATTN_NORM,
+    LLM_TENSOR_DEC_CROSS_ATTN_Q,
+    LLM_TENSOR_DEC_CROSS_ATTN_K,
+    LLM_TENSOR_DEC_CROSS_ATTN_V,
+    LLM_TENSOR_DEC_CROSS_ATTN_OUT,
+    LLM_TENSOR_DEC_CROSS_ATTN_REL_B,
+    LLM_TENSOR_DEC_FFN_NORM,
+    LLM_TENSOR_DEC_FFN_GATE,
+    LLM_TENSOR_DEC_FFN_DOWN,
+    LLM_TENSOR_DEC_FFN_UP,
+    LLM_TENSOR_DEC_OUTPUT_NORM,
+    LLM_TENSOR_ENC_ATTN_NORM,
+    LLM_TENSOR_ENC_ATTN_Q,
+    LLM_TENSOR_ENC_ATTN_K,
+    LLM_TENSOR_ENC_ATTN_V,
+    LLM_TENSOR_ENC_ATTN_OUT,
+    LLM_TENSOR_ENC_ATTN_REL_B,
+    LLM_TENSOR_ENC_FFN_NORM,
+    LLM_TENSOR_ENC_FFN_GATE,
+    LLM_TENSOR_ENC_FFN_DOWN,
+    LLM_TENSOR_ENC_FFN_UP,
+    LLM_TENSOR_ENC_OUTPUT_NORM,
 };
 
 static const std::map<llm_arch, std::map<llm_tensor, std::string>> LLM_TENSOR_NAMES = {
@@ -1135,6 +1173,41 @@ static const std::map<llm_arch, std::map<llm_tensor, std::string>> LLM_TENSOR_NA
             { LLM_TENSOR_FFN_SUB_NORM,       "blk.%d.ffn_sub_norm" },
         },
     },
+    {
+        LLM_ARCH_T5,
+        {
+            { LLM_TENSOR_TOKEN_EMBD,           "token_embd" },
+            { LLM_TENSOR_OUTPUT,               "output" },
+            { LLM_TENSOR_DEC_OUTPUT_NORM,      "dec.output_norm" },
+            { LLM_TENSOR_DEC_ATTN_NORM,        "dec.blk.%d.attn_norm" },
+            { LLM_TENSOR_DEC_ATTN_Q,           "dec.blk.%d.attn_q" },
+            { LLM_TENSOR_DEC_ATTN_K,           "dec.blk.%d.attn_k" },
+            { LLM_TENSOR_DEC_ATTN_V,           "dec.blk.%d.attn_v" },
+            { LLM_TENSOR_DEC_ATTN_OUT,         "dec.blk.%d.attn_o" },
+            { LLM_TENSOR_DEC_ATTN_REL_B,       "dec.blk.%d.attn_rel_b" },
+            { LLM_TENSOR_DEC_CROSS_ATTN_NORM,  "dec.blk.%d.cross_attn_norm" },
+            { LLM_TENSOR_DEC_CROSS_ATTN_Q,     "dec.blk.%d.cross_attn_q" },
+            { LLM_TENSOR_DEC_CROSS_ATTN_K,     "dec.blk.%d.cross_attn_k" },
+            { LLM_TENSOR_DEC_CROSS_ATTN_V,     "dec.blk.%d.cross_attn_v" },
+            { LLM_TENSOR_DEC_CROSS_ATTN_OUT,   "dec.blk.%d.cross_attn_o" },
+            { LLM_TENSOR_DEC_CROSS_ATTN_REL_B, "dec.blk.%d.cross_attn_rel_b" },
+            { LLM_TENSOR_DEC_FFN_NORM,         "dec.blk.%d.ffn_norm" },
+            { LLM_TENSOR_DEC_FFN_GATE,         "dec.blk.%d.ffn_gate" },
+            { LLM_TENSOR_DEC_FFN_DOWN,         "dec.blk.%d.ffn_down" },
+            { LLM_TENSOR_DEC_FFN_UP,           "dec.blk.%d.ffn_up" },
+            { LLM_TENSOR_ENC_OUTPUT_NORM,      "enc.output_norm" },
+            { LLM_TENSOR_ENC_ATTN_NORM,        "enc.blk.%d.attn_norm" },
+            { LLM_TENSOR_ENC_ATTN_Q,           "enc.blk.%d.attn_q" },
+            { LLM_TENSOR_ENC_ATTN_K,           "enc.blk.%d.attn_k" },
+            { LLM_TENSOR_ENC_ATTN_V,           "enc.blk.%d.attn_v" },
+            { LLM_TENSOR_ENC_ATTN_OUT,         "enc.blk.%d.attn_o" },
+            { LLM_TENSOR_ENC_ATTN_REL_B,       "enc.blk.%d.attn_rel_b" },
+            { LLM_TENSOR_ENC_FFN_NORM,         "enc.blk.%d.ffn_norm" },
+            { LLM_TENSOR_ENC_FFN_GATE,         "enc.blk.%d.ffn_gate" },
+            { LLM_TENSOR_ENC_FFN_DOWN,         "enc.blk.%d.ffn_down" },
+            { LLM_TENSOR_ENC_FFN_UP,           "enc.blk.%d.ffn_up" },
+        },
+    },
     {
         LLM_ARCH_UNKNOWN,
         {
@@ -2356,6 +2429,11 @@ struct llama_vocab {
     bool tokenizer_add_bos          = false;
     bool tokenizer_add_eos          = false;
     bool tokenizer_ignore_merges    = false;
+    bool tokenizer_remove_extra_whitespaces   = false;
+    bool tokenizer_escape_whitespaces         = true;
+    bool tokenizer_treat_whitespace_as_suffix = false;
+
+    std::vector<char> precompiled_charsmap;
 
     int find_bpe_rank(const std::string & token_left, const std::string & token_right) const {
         GGML_ASSERT(token_left.find(' ') == std::string::npos);
@@ -4191,6 +4269,7 @@ static const char * llama_model_vocab_type_name(enum llama_vocab_type type){
         case LLAMA_VOCAB_TYPE_SPM:  return "SPM";
         case LLAMA_VOCAB_TYPE_BPE:  return "BPE";
         case LLAMA_VOCAB_TYPE_WPM:  return "WPM";
+        case LLAMA_VOCAB_TYPE_UGM:  return "UGM";
         default:                    return "unknown";
     }
 }
@@ -4870,6 +4949,45 @@ static void llm_load_vocab(
             vocab.special_pad_id  = -1;
             vocab.special_cls_id  = -1;
             vocab.special_mask_id = -1;
+        } else if (tokenizer_model == "t5") {
+            vocab.type = LLAMA_VOCAB_TYPE_UGM;
+
+            // default special tokens
+            vocab.special_bos_id  = -1;
+            vocab.special_eos_id  = 1;
+            vocab.special_unk_id  = 2;
+            vocab.special_sep_id  = -1;
+            vocab.special_pad_id  = 0;
+            vocab.special_cls_id  = -1;
+            vocab.special_mask_id = -1;
+
+            const int add_space_prefix_keyidx = gguf_find_key(ctx, kv(LLM_KV_TOKENIZER_ADD_PREFIX).c_str());
+            if (add_space_prefix_keyidx != -1) {
+                vocab.tokenizer_add_space_prefix = gguf_get_val_bool(ctx, add_space_prefix_keyidx);
+            } // The default value of add_space_prefix is true.
+
+            const int remove_extra_whitespaces_keyidx = gguf_find_key(ctx, kv(LLM_KV_TOKENIZER_REMOVE_EXTRA_WS).c_str());
+            if (remove_extra_whitespaces_keyidx != -1) {
+                vocab.tokenizer_remove_extra_whitespaces = gguf_get_val_bool(ctx, remove_extra_whitespaces_keyidx);
+            } // The default value of remove_extra_whitespaces is false.
+
+            const int precompiled_charsmap_keyidx = gguf_find_key(ctx, kv(LLM_KV_TOKENIZER_PRECOMPILED_CHARSMAP).c_str());
+            if (precompiled_charsmap_keyidx != -1) {
+                size_t n_precompiled_charsmap = gguf_get_arr_n(ctx, precompiled_charsmap_keyidx);
+                const char * precompiled_charsmap = (const char *) gguf_get_arr_data(ctx, precompiled_charsmap_keyidx);
+                vocab.precompiled_charsmap.assign(precompiled_charsmap, precompiled_charsmap + n_precompiled_charsmap);
+#ifdef IS_BIG_ENDIAN
+                // correct endiannes of data in precompiled_charsmap binary blob
+                uint32_t * xcda_blob_size = (uint32_t *) &vocab.precompiled_charsmap[0];
+                *xcda_blob_size = __builtin_bswap32(*xcda_blob_size);
+                assert(*xcda_blob_size + sizeof(uint32_t) < n_precompiled_charsmap);
+                size_t xcda_array_size = *xcda_blob_size / sizeof(uint32_t);
+                uint32_t * xcda_array = (uint32_t *) &vocab.precompiled_charsmap[sizeof(uint32_t)];
+                for (size_t i = 0; i < xcda_array_size; ++i) {
+                    xcda_array[i] = __builtin_bswap32(xcda_array[i]);
+                }
+#endif
+            }
         } else {
             throw std::runtime_error(format("unknown tokenizer: '%s'", tokenizer_model.c_str()));
         }
@@ -4952,6 +5070,10 @@ static void llm_load_vocab(
             vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_DEFAULT;
             vocab.tokenizer_add_bos = true;
             vocab.tokenizer_add_eos = false;
+        } else if (vocab.type == LLAMA_VOCAB_TYPE_UGM) {
+            vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_DEFAULT;
+            vocab.tokenizer_add_bos = false;
+            vocab.tokenizer_add_eos = true;
         } else {
             vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_DEFAULT;
         }
@@ -13213,12 +13335,18 @@ static bool llama_is_user_defined_token(const llama_vocab& vocab, llama_token id
     return vocab.id_to_token[id].attr & LLAMA_TOKEN_ATTR_USER_DEFINED;
 }
 
+static bool llama_is_unused_token(const llama_vocab& vocab, llama_token id) {
+    GGML_ASSERT(vocab.type != LLAMA_VOCAB_TYPE_NONE);
+    return vocab.id_to_token[id].attr & LLAMA_TOKEN_ATTR_UNUSED;
+}
+
 static uint8_t llama_token_to_byte(const llama_vocab& vocab, llama_token id) {
     GGML_ASSERT(llama_vocab_get_type(vocab) != LLAMA_VOCAB_TYPE_NONE);
     GGML_ASSERT(llama_is_byte_token(vocab, id));
     const auto & token_data = vocab.id_to_token.at(id);
     switch (llama_vocab_get_type(vocab)) {
-        case LLAMA_VOCAB_TYPE_SPM: {
+        case LLAMA_VOCAB_TYPE_SPM:
+        case LLAMA_VOCAB_TYPE_UGM: {
             auto buf = token_data.text.substr(3, 2);
             return strtol(buf.c_str(), NULL, 16);
         }
@@ -13238,7 +13366,8 @@ static llama_token llama_byte_to_token(const llama_vocab & vocab, uint8_t ch) {
     GGML_ASSERT(llama_vocab_get_type(vocab) != LLAMA_VOCAB_TYPE_NONE);
     static const char * hex = "0123456789ABCDEF";
     switch (llama_vocab_get_type(vocab)) {
-        case LLAMA_VOCAB_TYPE_SPM: {
+        case LLAMA_VOCAB_TYPE_SPM:
+        case LLAMA_VOCAB_TYPE_UGM: {
             const char buf[7] = { '<', '0', 'x', hex[ch >> 4], hex[ch & 15], '>', 0 };
             auto token = vocab.token_to_id.find(buf);
             if (token != vocab.token_to_id.end()) {
@@ -13826,6 +13955,383 @@ struct llm_tokenizer_wpm {
     const llama_vocab & vocab;
 };
 
+struct naive_trie {
+    naive_trie() : has_value(false), value(0) {
+    }
+    void insert(const char * key, size_t len, int32_t value = 0) {
+        if (len == 0) {
+            this->has_value = true;
+            this->value = value;
+            return;
+        }
+        char c = key[0];
+        auto res = children.find(c);
+        if (res != children.end()) {
+            res->second.insert(key + 1, len - 1, value);
+        } else {
+            auto res = children.insert(std::make_pair(c, naive_trie()));
+            res.first->second.insert(key + 1, len - 1, value);
+        }
+    }
+    std::pair<const char *, size_t> get_longest_prefix(const char * key, size_t len, size_t offset = 0) {
+        if (len == 0 || offset == len) {
+            return std::make_pair(key, offset);
+        }
+        char c = key[offset];
+        auto res = children.find(c);
+        if (res != children.end()) {
+            return res->second.get_longest_prefix(key, len, offset + 1);
+        } else {
+            return std::make_pair(key, offset);
+        }
+    }
+    struct naive_trie * traverse(const char c) {
+        auto res = children.find(c);
+        if (res != children.end()) {
+            return &res->second;
+        } else {
+            return NULL;
+        }
+    }
+    std::map<char, struct naive_trie> children;
+    bool has_value;
+    llama_token value;
+};
+
+struct llm_tokenizer_ugm {
+    llm_tokenizer_ugm(const llama_vocab & vocab) : vocab(vocab) {
+        if (vocab.precompiled_charsmap.size() > 0) {
+            size_t charsmap_offset = 0;
+
+            // First four bytes of precompiled_charsmap contains length of binary
+            // blob containing XOR-compressed compact double array (XCDA) entries
+            uint32_t xcda_blob_size = *(const uint32_t *) &vocab.precompiled_charsmap[0];
+            charsmap_offset += sizeof(xcda_blob_size);
+            if (xcda_blob_size + charsmap_offset >= vocab.precompiled_charsmap.size()) {
+                throw std::runtime_error("Index out of array bounds in precompiled charsmap!");
+            }
+
+            // Next xcda_blob_size bytes contain entries of XOR-compressed compact
+            // double array (XCDA). Each entry is bit-packed into a 32-bit integer.
+            xcda_array = (const uint32_t *) &vocab.precompiled_charsmap[charsmap_offset];
+            xcda_array_size = xcda_blob_size / sizeof(uint32_t);
+            charsmap_offset += xcda_blob_size;
+
+            // Remaining bytes of precompiled charsmap contain null-terminated
+            // replacement strings for prefixes matched by the XCDA.
+            prefix_replacements = &vocab.precompiled_charsmap[charsmap_offset];
+            prefix_replacements_size = vocab.precompiled_charsmap.size() - charsmap_offset;
+        }
+
+        for (unsigned int id = 0; id < vocab.id_to_token.size(); ++id) {
+            const auto &token_data = vocab.id_to_token[id];
+
+            if (llama_is_normal_token(vocab, id)) {
+                min_score = std::min<float>(min_score, token_data.score);
+                max_score = std::max<float>(max_score, token_data.score);
+            }
+
+            if (llama_is_normal_token(vocab, id) ||
+                llama_is_user_defined_token(vocab, id) ||
+                llama_is_unused_token(vocab, id)) {
+                token_matcher.insert(token_data.text.data(), token_data.text.size(), id);
+            }
+
+            if (llama_is_user_defined_token(vocab, id)) {
+                user_defined_token_matcher.insert(token_data.text.data(), token_data.text.size());
+            }
+        }
+
+        unknown_token_score = min_score - unknown_token_score_penalty;
+    }
+
+    /* This implementation is based on SentencePiece optimized Viterbi algorithm for
+     * unigram language models. The general idea is to:
+     * - move along the input sequence in steps of one UTF code point,
+     * - at each step find all possible tokenizations of the prefix by
+     *   traversing the tokens trie,
+     * - for each tokenization store the best one so far (by higher score)
+     * - use the position in sequence after given token as an index to store
+     *   results
+     * - if there was no valid tokenization of the current UTF code point
+     *   then use unknown token with additional score penalty
+     * After processing the whole sequence we backtrack from the end to get
+     * the best tokenization.
+    */
+    void tokenize(const std::string & text, std::vector<llama_vocab::id> & output) {
+        // normalize the input first
+        std::string normalized;
+        normalize(text, &normalized);
+        size_t input_len = normalized.size();
+
+        // initialize score_sum to -FLT_MAX so it will be always lower than sums of token scores
+        std::vector<struct best_tokenization> tokenization_results(input_len + 1, {0, 0, -FLT_MAX});
+        // at the beginning tokenization score is zero
+        tokenization_results[0] = { 0, 0, 0 };
+
+        for (size_t input_offset = 0; input_offset < input_len;) {
+            size_t prefix_offset = input_offset;
+            // calculate how many code units are in the currently processed UTF code point
+            size_t n_utf8_code_units = std::min<size_t>(utf8_len(normalized[input_offset]), input_len - input_offset);
+
+            // traverse the token matcher trie to find a matching token
+            bool single_codepoint_token_found = false;
+            const struct best_tokenization & current_best = tokenization_results[input_offset];
+            struct naive_trie * node  = token_matcher.traverse(normalized[prefix_offset++]);
+
+            while (prefix_offset <= input_len && node != NULL) {
+                // check if we found valid token in prefix
+                if (node->has_value) {
+                    // check if it corresponds to the whole UTF code point
+                    if (prefix_offset - input_offset == n_utf8_code_units) {
+                        single_codepoint_token_found = true;
+                    }
+                    llama_token token_id = node->value;
+                    const auto &token_data = vocab.id_to_token[token_id];
+
+                    // we set the user-defined token scores to 0 to make them more likely to be selected
+                    // (normal token scores are log probabilities, so they are negative)
+                    // score type is double here to make tokenization results exactly
+                    // the same as in the HF tokenizer using SentencePiece
+                    const double token_score = llama_is_user_defined_token(vocab, token_id) ? 0.0 : token_data.score;
+                    const double challenger_score = current_best.score_sum + token_score;
+                    struct best_tokenization & current_champ = tokenization_results[prefix_offset];
+                    if (challenger_score > current_champ.score_sum) {
+                        struct best_tokenization challenger = { token_id, input_offset, (float) challenger_score };
+                        current_champ = challenger;
+                    }
+                }
+                node = node->traverse(normalized[prefix_offset++]);
+            }
+
+            // if we didn't find a valid token corresponding to the whole UTF code point
+            // then use unknown token as the tokenization of this UTF code point
+            if (!single_codepoint_token_found) {
+                const double challenger_score = current_best.score_sum + unknown_token_score;
+                prefix_offset = input_offset + n_utf8_code_units;
+                struct best_tokenization & current_champ = tokenization_results[prefix_offset];
+                if (challenger_score > current_champ.score_sum) {
+                    struct best_tokenization challenger = { vocab.special_unk_id, input_offset, (float) challenger_score };
+                    current_champ = challenger;
+                }
+            }
+
+            // move to the next UTF code point
+            input_offset += n_utf8_code_units;
+        }
+
+        // now backtrack from the end to gather token ids of the best tokenization
+        // merge sequences of consecutive unknown tokens into single unknown tokens
+        bool is_prev_unknown = false;
+        for (struct best_tokenization & tokenization = tokenization_results[input_len]; ; tokenization = tokenization_results[tokenization.input_offset]) {
+            bool is_unknown = tokenization.token_id == vocab.special_unk_id;
+            if (!(is_prev_unknown && is_unknown)) {
+                output.push_back(tokenization.token_id);
+            }
+            if (tokenization.input_offset == 0) {
+                break;
+            }
+            is_prev_unknown = is_unknown;
+        }
+
+        // reverse the output since we added tokens starting from the end of the input
+        std::reverse(output.begin(), output.end());
+    }
+
+private:
+    const llama_vocab & vocab;
+
+    // helper structure for returning normalization results
+    struct normalization_result {
+        const char * normalized;
+        size_t normalized_len;
+        size_t consumed_input;
+    };
+
+    void normalize(const std::string& input, std::string * normalized) {
+        normalized->clear();
+        normalized->reserve(input.size() * 3);
+
+        const std::string space = vocab.tokenizer_escape_whitespaces ? escaped_space : " ";
+
+        bool shall_prepend_space = !vocab.tokenizer_treat_whitespace_as_suffix && vocab.tokenizer_add_space_prefix;
+        bool shall_append_space = vocab.tokenizer_treat_whitespace_as_suffix && vocab.tokenizer_add_space_prefix;
+        bool shall_merge_spaces = vocab.tokenizer_remove_extra_whitespaces;
+
+        bool is_space_prepended = false;
+        bool processing_non_ws = false;
+
+        size_t input_len = input.size();
+
+        for (size_t input_offset = 0; input_offset < input_len; ) {
+            auto norm_res = normalize_prefix(input, input_offset);
+            for (size_t i = 0; i < norm_res.normalized_len; i++) {
+                char c = norm_res.normalized[i];
+                if (c != ' ') {
+                    if (!processing_non_ws) {
+                        processing_non_ws = true;
+                        if ((shall_prepend_space && !is_space_prepended) || shall_merge_spaces) {
+                            normalized->append(space);
+                            is_space_prepended = true;
+                        }
+                    }
+                    normalized->push_back(c);
+                } else {
+                    if (processing_non_ws) {
+                        processing_non_ws = false;
+                    }
+                    if (!shall_merge_spaces) {
+                        normalized->append(space);
+                    }
+                }
+            }
+
+            input_offset += norm_res.consumed_input;
+        }
+
+        if (shall_append_space) {
+            normalized->append(space);
+        }
+    }
+
+    /*
+     * This structure is a view wrapper for XOR-compressed double array (XCDA)
+     * See Shunsuke Kanda (2018). Space- and Time-Efficient String Dictionaries.
+     * Eeach bit-packed entry contains:
+     * - BASE array value in bits 10-30
+     * - LCHECK array value in bits 0-7
+     * - LEAF array value in bit 9
+     * Entries containing indexes of replacement sequences have set bit 31
+     */
+    struct xcda_array_view {
+    public:
+        xcda_array_view(const uint32_t * xcda_array, size_t xcda_array_size) : xcda_array(xcda_array), xcda_array_size(xcda_array_size) {
+        }
+        uint32_t get_base(size_t index) {
+            uint32_t packed_node = get_node(index);
+            return (packed_node >> 10) << ((packed_node & (1U << 9)) >> 6);
+        }
+        uint32_t get_lcheck(size_t index) {
+            uint32_t packed_node = get_node(index);
+            return packed_node & ((1U << 31) | 0xff);
+        }
+        bool get_leaf(size_t index) {
+            uint32_t packed_node = get_node(index);
+            return (packed_node >> 8) & 1;
+        }
+        uint32_t get_value(size_t index) {
+            uint32_t packed_node = get_node(index);
+            return packed_node & ((1U << 31) - 1);
+        }
+    private:
+        uint32_t get_node(size_t index) {
+            if (index > xcda_array_size) {
+                throw std::runtime_error("Index out of array bounds in XCDA array!");
+            }
+            return xcda_array[index];
+        }
+        const uint32_t * xcda_array;
+        size_t xcda_array_size;
+    };
+
+    struct normalization_result normalize_prefix(const std::string & input, size_t input_offset) {
+        if (input_offset == input.size()) {
+            return { &input[input_offset], 0, 0 };
+        }
+
+        // if input prefix matches some user-defined token return this token as normalization result
+        auto user_defined_token_match = user_defined_token_matcher.get_longest_prefix(&input[input_offset], input.size() - input_offset);
+        if (user_defined_token_match.second > 0) {
+            return { &input[input_offset], user_defined_token_match.second, user_defined_token_match.second };
+        }
+
+        size_t longest_prefix_length = 0;
+        size_t longest_prefix_offset = 0;
+
+        if (xcda_array_size > 0) {
+            struct xcda_array_view xcda_view(xcda_array, xcda_array_size);
+
+            // Find the longest normalized sequence matching the input prefix by walking
+            // the XOR-compressed compact double array (XCDA) starting from the root node
+            // We find the index of the next node by calculating BASE[s] ^ c where s is
+            // the index of the previous node and c is a numerical character value
+            uint32_t node_index = 0;
+            // get BASE of the root node
+            node_index = xcda_view.get_base(node_index);
+            for (size_t prefix_offset = input_offset; prefix_offset < input.size(); prefix_offset++) {
+                unsigned char c = input[prefix_offset];
+                if (c == 0) {
+                    break;
+                }
+                node_index ^= c;
+                // if value of LCHECK is not c it means that this is not a child of
+                // the previous node, so we stop matching
+                if (xcda_view.get_lcheck(node_index) != c) {
+                    break;
+                }
+                bool is_leaf = xcda_view.get_leaf(node_index);
+                // get BASE of the current node
+                node_index ^= xcda_view.get_base(node_index);
+                // if LEAF of the current node is true, it means that its BASE points to the node
+                // containing index of replacement sequence for currently matched input prefix
+                if (is_leaf)
+                {
+                    longest_prefix_length = prefix_offset - input_offset + 1;
+                    // get index of replacement sequence for currently matched input prefix
+                    longest_prefix_offset = xcda_view.get_value(node_index);
+                }
+            }
+        }
+
+        if (longest_prefix_length > 0) {
+            // we have a match, so return the replacement sequence
+            if (longest_prefix_offset >= prefix_replacements_size) {
+                throw std::runtime_error("Index out of array bounds in precompiled charsmap!");
+            }
+            const char * prefix_replacement = &prefix_replacements[longest_prefix_offset];
+            return { prefix_replacement, strlen(prefix_replacement), longest_prefix_length };
+        } else {
+            // check if the input prefix contains a valid sequence of UTF-8 code units
+            try {
+                // if yes, return this sequence unmodified
+                size_t prefix_offset = input_offset;
+                unicode_cpt_from_utf8(input, prefix_offset);
+                return { &input[input_offset], prefix_offset - input_offset, prefix_offset - input_offset };
+            } catch(std::invalid_argument & ex) {
+                // if no, consume 1 byte and return U+FFFD - REPLACEMENT CHARACTER
+                return { "\xEF\xBF\xBD", 3, 1 };
+            }
+        }
+    }
+
+    // escaped space symbol - U+2581 (Lower One Eighth Block)
+    const std::string escaped_space = "\xE2\x96\x81";
+
+    const char * prefix_replacements = NULL;
+    size_t prefix_replacements_size = 0;
+
+    const uint32_t * xcda_array = NULL;
+    size_t xcda_array_size = 0;
+
+    struct naive_trie user_defined_token_matcher;
+
+    // this structure stores the best tokenization so far at input_offset
+    struct best_tokenization {
+        llama_token token_id;
+        size_t input_offset;
+        float score_sum;
+    };
+
+    float min_score = FLT_MAX;
+    float max_score = -FLT_MAX;
+
+    float unknown_token_score_penalty = 10.0;
+    float unknown_token_score;
+
+    struct naive_trie token_matcher;
+};
+
+
 typedef enum FRAGMENT_BUFFER_VARIANT_TYPE {
     FRAGMENT_BUFFER_VARIANT_TYPE_TOKEN,
     FRAGMENT_BUFFER_VARIANT_TYPE_RAW_TEXT
@@ -14086,6 +14592,39 @@ static std::vector<llama_vocab::id> llama_tokenize_internal(const llama_vocab &
                     output.push_back(vocab.special_sep_id);
                 }
             } break;
+        case LLAMA_VOCAB_TYPE_UGM:
+            {
+                llm_tokenizer_ugm tokenizer(vocab);
+
+                if (add_special && vocab.tokenizer_add_bos != 0) {
+                    GGML_ASSERT(vocab.special_bos_id != -1);
+                    output.push_back(vocab.special_bos_id);
+                }
+
+                for (const auto & fragment : fragment_buffer) {
+                    if (fragment.type == FRAGMENT_BUFFER_VARIANT_TYPE_RAW_TEXT) {
+                        auto raw_text = fragment.raw_text.substr(fragment.offset, fragment.length);
+#ifdef PRETOKENIZERDEBUG
+                        LLAMA_LOG_WARN("TT: (%ld %ld %ld) '%s'\n", raw_text.length(), fragment.offset, fragment.length, raw_text.c_str());
+#endif
+                        tokenizer.tokenize(raw_text, output);
+                    } else { // if (fragment.type == FRAGMENT_BUFFER_VARIANT_TYPE_TOKEN)
+                        output.push_back(fragment.token);
+                    }
+                }
+
+                if (add_special && vocab.tokenizer_add_bos != 0 && output.size() >= 2 && output[1] == vocab.special_bos_id) {
+                    LLAMA_LOG_WARN(
+                        "%s: Added a BOS token to the prompt as specified by the model but the prompt "
+                        "also starts with a BOS token. So now the final prompt starts with 2 BOS tokens. "
+                        "Are you sure this is what you want?\n", __FUNCTION__);
+                }
+
+                if (add_special && vocab.tokenizer_add_eos == 1) {
+                    GGML_ASSERT(vocab.special_eos_id != -1);
+                    output.push_back(vocab.special_eos_id);
+                }
+            } break;
         case LLAMA_VOCAB_TYPE_NONE:
             GGML_ASSERT(false);
     }
@@ -16964,6 +17503,7 @@ enum llama_rope_type llama_rope_type(const struct llama_model * model) {
         case LLM_ARCH_BLOOM:
         case LLM_ARCH_MAMBA:
         case LLM_ARCH_JINA_BERT_V2:
+        case LLM_ARCH_T5:
             return LLAMA_ROPE_TYPE_NONE;
 
         // use what we call a normal RoPE, operating on pairs of consecutive head values
@@ -18659,6 +19199,10 @@ llama_token llama_token_eot(const struct llama_model * model) {
     return model->vocab.special_eot_id;
 }
 
+llama_token llama_token_pad(const struct llama_model * model) {
+    return model->vocab.special_pad_id;
+}
+
 int32_t llama_tokenize(
     const struct llama_model * model,
                   const char * text,
@@ -18725,7 +19269,8 @@ int32_t llama_token_to_piece(const struct llama_model * model, llama_token token
     if (0 <= token && token < llama_n_vocab(model)) {
         switch (llama_vocab_get_type(model->vocab)) {
             case LLAMA_VOCAB_TYPE_WPM:
-            case LLAMA_VOCAB_TYPE_SPM: {
+            case LLAMA_VOCAB_TYPE_SPM:
+            case LLAMA_VOCAB_TYPE_UGM: {
                 // NOTE: we accept all unsupported token types,
                 // suppressing them like CONTROL tokens.
                 if (llama_is_normal_token(model->vocab, token)) {
diff --git a/llama.h b/llama.h
index 82d15747f4662..88eecb0edb17e 100644
--- a/llama.h
+++ b/llama.h
@@ -67,6 +67,7 @@ extern "C" {
         LLAMA_VOCAB_TYPE_SPM  = 1, // LLaMA tokenizer based on byte-level BPE with byte fallback
         LLAMA_VOCAB_TYPE_BPE  = 2, // GPT-2 tokenizer based on byte-level BPE
         LLAMA_VOCAB_TYPE_WPM  = 3, // BERT tokenizer based on WordPiece
+        LLAMA_VOCAB_TYPE_UGM  = 4, // T5 tokenizer based on Unigram
     };
 
     // pre-tokenization types
@@ -857,6 +858,7 @@ extern "C" {
     LLAMA_API llama_token llama_token_cls(const struct llama_model * model); // classification
     LLAMA_API llama_token llama_token_sep(const struct llama_model * model); // sentence separator
     LLAMA_API llama_token llama_token_nl (const struct llama_model * model); // next-line
+    LLAMA_API llama_token llama_token_pad(const struct llama_model * model); // padding
 
     // Returns -1 if unknown, 1 for true or 0 for false.
     LLAMA_API int32_t         llama_add_bos_token(const struct llama_model * model);
diff --git a/unicode.cpp b/unicode.cpp
index c0b76bf20aede..8692924b957cc 100644
--- a/unicode.cpp
+++ b/unicode.cpp
@@ -23,7 +23,7 @@ static std::string unicode_cpts_to_utf8(const std::vector<uint32_t> & cps) {
     return result;
 }
 
-static uint32_t unicode_cpt_from_utf8(const std::string & utf8, size_t & offset) {
+uint32_t unicode_cpt_from_utf8(const std::string & utf8, size_t & offset) {
     assert(offset < utf8.size());
     if (!(utf8[offset + 0] & 0x80)) {
         auto result = utf8[offset + 0];
diff --git a/unicode.h b/unicode.h
index 6c488970a79d6..30b07ba7fa493 100644
--- a/unicode.h
+++ b/unicode.h
@@ -48,6 +48,7 @@ struct codepoint_flags {
 
 
 std::string unicode_cpt_to_utf8(uint32_t cp);
+uint32_t unicode_cpt_from_utf8(const std::string & utf8, size_t & offset);
 std::vector<uint32_t> unicode_cpts_from_utf8(const std::string & utf8);
 
 std::vector<uint32_t> unicode_cpts_normalize_nfd(const std::vector<uint32_t> & cpts);

From 163d50adaf8897d8b734d701ff332de6be63d484 Mon Sep 17 00:00:00 2001
From: jukofyork <69222624+jukofyork@users.noreply.github.com>
Date: Tue, 25 Jun 2024 21:47:40 +0100
Subject: [PATCH 13/15] fixes #7999 (adds control vectors to all `build_XXX()`
 functions in `llama.cpp` [needs testing] (#8060)

* fixes #7999

The `build_command_r` forgot to add the control vector.

* Fixes qwen2 too

* Fixed all models' control vectors

* Removed double calls to `cb(cur, "l_out", il)`

* Moved control vector logic to llama_control_vector:apply_to()
---
 llama.cpp | 112 +++++++++++++++++++++++++++++++++++-------------------
 1 file changed, 73 insertions(+), 39 deletions(-)

diff --git a/llama.cpp b/llama.cpp
index 78a21008f9338..989c731495dbb 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -2368,13 +2368,21 @@ struct llama_control_vector {
     int32_t layer_start = -1;
     int32_t layer_end   = -1;
 
-    ggml_tensor * tensor_for(int il) const {
+    struct ggml_tensor * tensor_for(int il) const {
         if (il < 0 || il < layer_start || il > layer_end || (size_t) il >= tensors.size()) {
             return nullptr;
         }
         return tensors[il];
     }
 
+    struct ggml_tensor * apply_to(struct ggml_context * ctx, struct ggml_tensor * cur, int  il) const {
+        ggml_tensor * layer_dir = tensor_for(il);
+        if (layer_dir != nullptr) {
+            cur = ggml_add(ctx, cur, layer_dir);
+        }
+        return cur;
+    }
+
     ~llama_control_vector() {
         for (struct ggml_context * ctx : ctxs) {
             ggml_free(ctx);
@@ -8023,10 +8031,7 @@ struct llm_build_context {
             cur = ggml_add(ctx0, cur, ffn_inp);
             cb(cur, "ffn_out", il);
 
-            ggml_tensor * layer_dir = lctx.cvec.tensor_for(il);
-            if (layer_dir != nullptr) {
-                cur = ggml_add(ctx0, cur, layer_dir);
-            }
+            cur = lctx.cvec.apply_to(ctx0, cur, il);
             cb(cur, "l_out", il);
 
             // input for next layer
@@ -8141,6 +8146,7 @@ struct llm_build_context {
             }
 
             cur = ggml_add(ctx0, cur, ffn_inp);
+            cur = lctx.cvec.apply_to(ctx0, cur, il);
             cb(cur, "l_out", il);
 
             // input for next layer
@@ -8245,6 +8251,7 @@ struct llm_build_context {
             }
 
             cur = ggml_add(ctx0, cur, ffn_inp);
+            cur = lctx.cvec.apply_to(ctx0, cur, il);
             cb(cur, "l_out", il);
 
             // input for next layer
@@ -8360,9 +8367,8 @@ struct llm_build_context {
             }
 
             cur = ggml_add(ctx0, cur, ffn_inp);
-            cb(cur, "l_out", il);
-
             cur = ggml_add(ctx0, cur, inpL);
+            cur = lctx.cvec.apply_to(ctx0, cur, il);
             cb(cur, "l_out", il);
 
             // input for next layer
@@ -8514,10 +8520,7 @@ struct llm_build_context {
             cur = ggml_add(ctx0, cur, ffn_inp);
             cb(cur, "ffn_out", il);
 
-            ggml_tensor * layer_dir = lctx.cvec.tensor_for(il);
-            if (layer_dir != nullptr) {
-                cur = ggml_add(ctx0, cur, layer_dir);
-            }
+            cur = lctx.cvec.apply_to(ctx0, cur, il);
             cb(cur, "l_out", il);
 
             // input for next layer
@@ -8648,10 +8651,7 @@ struct llm_build_context {
             cur = ggml_add(ctx0, cur, ffn_inp);
             cb(cur, "ffn_out", il);
 
-            ggml_tensor * layer_dir = lctx.cvec.tensor_for(il);
-            if (layer_dir != nullptr) {
-                cur = ggml_add(ctx0, cur, layer_dir);
-            }
+            cur = lctx.cvec.apply_to(ctx0, cur, il);
             cb(cur, "l_out", il);
 
             // input for next layer
@@ -8757,8 +8757,12 @@ struct llm_build_context {
                 cb(cur, "ffn_out", il);
             }
 
-            inpL = ggml_add(ctx0, cur, ffn_inp);
-            cb(inpL, "l_out", il);
+            cur = ggml_add(ctx0, cur, ffn_inp);
+            cur = lctx.cvec.apply_to(ctx0, cur, il);
+            cb(cur, "l_out", il);
+
+            // input for next layer
+            inpL = cur;
         }
 
         cur = llm_build_norm(ctx0, inpL, hparams,
@@ -8846,6 +8850,7 @@ struct llm_build_context {
             }
 
             cur = ggml_add(ctx0, cur, ffn_inp);
+            cur = lctx.cvec.apply_to(ctx0, cur, il);
             cb(cur, "l_out", il);
 
             // input for next layer
@@ -9141,8 +9146,12 @@ struct llm_build_context {
                 cb(cur, "ffn_out", il);
             }
 
-            inpL = ggml_add(ctx0, cur, ffn_inp);
-            cb(inpL, "l_out", il);
+            cur = ggml_add(ctx0, cur, ffn_inp);
+            cur = lctx.cvec.apply_to(ctx0, cur, il);
+            cb(cur, "l_out", il);
+
+            // input for next layer
+            inpL = cur;
         }
 
         cur = llm_build_norm(ctx0, inpL, hparams,
@@ -9276,6 +9285,7 @@ struct llm_build_context {
             }
 
             cur = ggml_add(ctx0, cur, ffn_inp);
+            cur = lctx.cvec.apply_to(ctx0, cur, il);
             cb(cur, "l_out", il);
 
             // input for next layer
@@ -9424,6 +9434,7 @@ struct llm_build_context {
             }
 
             cur = ggml_add(ctx0, cur, ffn_inp);
+            cur = lctx.cvec.apply_to(ctx0, cur, il);
             cb(cur, "l_out", il);
 
             // input for next layer
@@ -9536,6 +9547,7 @@ struct llm_build_context {
             }
 
             cur = ggml_add(ctx0, cur, ffn_inp);
+            cur = lctx.cvec.apply_to(ctx0, cur, il);
             cb(cur, "l_out", il);
 
             // input for next layer
@@ -9647,6 +9659,7 @@ struct llm_build_context {
             cb(cur, "ffn_out", il);
 
             cur = ggml_add(ctx0, cur, ffn_inp);
+            cur = lctx.cvec.apply_to(ctx0, cur, il);
             cb(cur, "l_out", il);
 
             // input for next layer
@@ -9792,6 +9805,7 @@ struct llm_build_context {
             }
 
             cur = ggml_add(ctx0, cur, ffn_inp);
+            cur = lctx.cvec.apply_to(ctx0, cur, il);
             cb(cur, "l_out", il);
 
             // input for next layer
@@ -9912,11 +9926,11 @@ struct llm_build_context {
             }
 
             cur = ggml_add(ctx0, cur, ffn_output);
-            cb(cur, "l_out", il);
-
             cur = ggml_add(ctx0, cur, inpL);
+            cur = lctx.cvec.apply_to(ctx0, cur, il);
             cb(cur, "l_out", il);
 
+            // input for next layer
             inpL = cur;
         }
 
@@ -10048,8 +10062,10 @@ struct llm_build_context {
             }
 
             cur = ggml_add(ctx0, residual, cur);
+            cur = lctx.cvec.apply_to(ctx0, cur, il);
             cb(cur, "l_out", il);
 
+            // input for next layer
             inpL = cur;
         }
 
@@ -10148,9 +10164,8 @@ struct llm_build_context {
             }
 
             cur = ggml_add(ctx0, cur, sa_out);
-            cb(cur, "l_out", il);
-
             cur = ggml_add(ctx0, cur, inpL);
+            cur = lctx.cvec.apply_to(ctx0, cur, il);
             cb(cur, "l_out", il);
 
             // input for next layer
@@ -10256,8 +10271,12 @@ struct llm_build_context {
                 cb(cur, "ffn_out", il);
             }
 
-            inpL = ggml_add(ctx0, cur, ffn_inp);
-            cb(inpL, "l_out", il);
+            cur = ggml_add(ctx0, cur, ffn_inp);
+            cur = lctx.cvec.apply_to(ctx0, cur, il);
+            cb(cur, "l_out", il);
+
+            // input for next layer
+            inpL = cur;
         }
 
         cur = llm_build_norm(ctx0, inpL, hparams,
@@ -10363,8 +10382,12 @@ struct llm_build_context {
                 cb(cur, "ffn_out", il);
             }
 
-            inpL = ggml_add(ctx0, cur, ffn_inp);
-            cb(inpL, "l_out", il);
+            cur = ggml_add(ctx0, cur, ffn_inp);
+            cur = lctx.cvec.apply_to(ctx0, cur, il);
+            cb(cur, "l_out", il);
+
+            // input for next layer
+            inpL = cur;
         }
 
         cur = llm_build_norm(ctx0, inpL, hparams,
@@ -10476,6 +10499,7 @@ struct llm_build_context {
             cb(cur, "ffn_out", il);
 
             cur = ggml_add(ctx0, cur, ffn_inp);
+            cur = lctx.cvec.apply_to(ctx0, cur, il);
             cb(cur, "l_out", il);
 
             // input for next layer
@@ -10593,6 +10617,7 @@ struct llm_build_context {
             cb(cur, "ffn_out", il);
 
             cur = ggml_add(ctx0, cur, ffn_inp);
+            cur = lctx.cvec.apply_to(ctx0, cur, il);
             cb(cur, "l_out", il);
 
             // input for next layer
@@ -10734,6 +10759,7 @@ struct llm_build_context {
             cb(cur, "hidden_scaled_ffn", -1);
 
             cur = ggml_add(ctx0, cur, ffn_inp);
+            cur = lctx.cvec.apply_to(ctx0, cur, il);
             cb(cur, "l_out", il);
 
             // input for next layer
@@ -10846,6 +10872,7 @@ struct llm_build_context {
             }
 
             cur = ggml_add(ctx0, cur, sa_out);
+            cur = lctx.cvec.apply_to(ctx0, cur, il);
             cb(cur, "l_out", il);
 
             // input for next layer
@@ -10962,7 +10989,9 @@ struct llm_build_context {
                         NULL,
                         LLM_FFN_GELU, LLM_FFN_SEQ, cb, il);
             cb(cur, "ffn_out", il);
+
             cur = ggml_add(ctx0, cur, ffn_inp);
+            cur = lctx.cvec.apply_to(ctx0, cur, il);
             cb(cur, "l_out", il);
 
             // input for next layer
@@ -11111,6 +11140,7 @@ struct llm_build_context {
 
             // residual
             cur = ggml_add(ctx0, cur, inpL);
+            cur = lctx.cvec.apply_to(ctx0, cur, il);
             cb(cur, "l_out", il);
 
             // input for next layer
@@ -11252,6 +11282,7 @@ struct llm_build_context {
             // add together residual + FFN + self-attention
             cur = ggml_add(ctx0, cur, inpL);
             cur = ggml_add(ctx0, cur, attn_out);
+            cur = lctx.cvec.apply_to(ctx0, cur, il);
             cb(cur, "l_out", il);
 
             // input for next layer
@@ -11387,10 +11418,7 @@ struct llm_build_context {
             cur = ggml_add(ctx0, cur, ffn_inp);
             cb(cur, "ffn_out", il);
 
-            ggml_tensor * layer_dir = lctx.cvec.tensor_for(il);
-            if (layer_dir != nullptr) {
-                cur = ggml_add(ctx0, cur, layer_dir);
-            }
+            cur = lctx.cvec.apply_to(ctx0, cur, il);
             cb(cur, "l_out", il);
 
             // input for next layer
@@ -11504,8 +11532,12 @@ struct llm_build_context {
                 cur = ggml_add(ctx0, cur, inpL);
                 cb(cur, "ffn_out", il);
 
-                inpL = ggml_add(ctx0, cur, attn_out);
-                cb(inpL, "l_out", il);
+                cur = ggml_add(ctx0, cur, attn_out);
+                cur = lctx.cvec.apply_to(ctx0, cur, il);
+                cb(cur, "l_out", il);
+
+                // input for next layer
+                inpL = cur;
             } else {
                 // attention and ffn are computed sequentially
                 // x = x + attn(ln1(x))
@@ -11528,8 +11560,12 @@ struct llm_build_context {
                         LLM_FFN_GELU, LLM_FFN_SEQ, cb, il);
                 cb(cur, "ffn_out", il);
 
-                inpL = ggml_add(ctx0, cur, ffn_inp);
-                cb(inpL, "l_out", il);
+                cur = ggml_add(ctx0, cur, ffn_inp);
+                cur = lctx.cvec.apply_to(ctx0, cur, il);
+                cb(cur, "l_out", il);
+
+                // input for next layer
+                inpL = cur;
             }
         }
 
@@ -11656,10 +11692,7 @@ struct llm_build_context {
             cur = ggml_add(ctx0, cur, ffn_out);
             cb(cur, "ffn_out", il);
 
-            ggml_tensor * layer_dir = lctx.cvec.tensor_for(il);
-            if (layer_dir != nullptr) {
-                cur = ggml_add(ctx0, cur, layer_dir);
-            }
+            cur = lctx.cvec.apply_to(ctx0, cur, il);
             cb(cur, "l_out", il);
 
             // input for next layer
@@ -11892,6 +11925,7 @@ struct llm_build_context {
             }
 
             cur = ggml_add(ctx0, cur, ffn_inp);
+            cur = lctx.cvec.apply_to(ctx0, cur, il);
             cb(cur, "l_out", il);
 
             // input for next layer

From 6777c544bdd8c5d9de3220d6e2557957bbbf2a4f Mon Sep 17 00:00:00 2001
From: Olivier Chafik <ochafik@users.noreply.github.com>
Date: Wed, 26 Jun 2024 01:45:58 +0100
Subject: [PATCH 14/15] `json`: fix additionalProperties, allow space after
 enum/const (#7840)

* json: default additionalProperty to true

* json: don't force additional props after normal properties!

* json: allow space after enum/const

* json: update pydantic example to set additionalProperties: false

* json: prevent additional props to redefine a typed prop

* port not_strings to python, add trailing space

* fix not_strings & port to js+py

* Update json-schema-to-grammar.cpp

* fix _not_strings for substring overlaps

* json: fix additionalProperties default, uncomment tests

* json: add integ. test case for additionalProperties

* json: nit: simplify condition

* reformat grammar integ tests w/ R"""()""" strings where there's escapes

* update # tokens in server test: consts can now have trailing space
---
 common/json-schema-to-grammar.cpp             |  99 +++++-
 examples/json-schema-pydantic-example.py      |   6 +-
 examples/json_schema_to_grammar.py            |  76 ++++-
 .../server/public/json-schema-to-grammar.mjs  |  89 ++++-
 examples/server/tests/features/server.feature |   2 +-
 tests/test-grammar-integration.cpp            | 320 ++++++++----------
 tests/test-json-schema-to-grammar.cpp         | 150 ++++++--
 7 files changed, 497 insertions(+), 245 deletions(-)

diff --git a/common/json-schema-to-grammar.cpp b/common/json-schema-to-grammar.cpp
index 07d0e952d74cf..b40821dadc05e 100644
--- a/common/json-schema-to-grammar.cpp
+++ b/common/json-schema-to-grammar.cpp
@@ -614,6 +614,75 @@ class SchemaConverter {
         return _add_rule(name, "\"\\\"\" " + to_rule(transform()) + " \"\\\"\" space");
     }
 
+    /*
+        Returns a rule that matches a JSON string that is none of the provided strings
+
+        not_strings({"a"})
+            -> ["] ( [a] char+ | [^"a] char* )? ["] space
+        not_strings({"and", "also"})
+            -> ["] ( [a] ([l] ([s] ([o] char+ | [^"o] char*) | [^"s] char*) | [n] ([d] char+ | [^"d] char*) | [^"ln] char*) | [^"a] char* )? ["] space
+    */
+    std::string _not_strings(const std::vector<std::string> & strings) {
+
+        struct TrieNode {
+            std::map<char, TrieNode> children;
+            bool is_end_of_string;
+
+            TrieNode() : is_end_of_string(false) {}
+
+            void insert(const std::string & string) {
+                auto node = this;
+                for (char c : string) {
+                    node = &node->children[c];
+                }
+                node->is_end_of_string = true;
+            }
+        };
+
+        TrieNode trie;
+        for (const auto & s : strings) {
+            trie.insert(s);
+        }
+
+        std::string char_rule = _add_primitive("char", PRIMITIVE_RULES.at("char"));
+        std::ostringstream out;
+        out << "[\"] ( ";
+        std::function<void(const TrieNode &)> visit = [&](const TrieNode & node) {
+            std::ostringstream rejects;
+            auto first = true;
+            for (const auto & kv : node.children) {
+                rejects << kv.first;
+                if (first) {
+                    first = false;
+                } else {
+                    out << " | ";
+                }
+                out << "[" << kv.first << "]";
+                if (!kv.second.children.empty()) {
+                    out << " (";
+                    visit(kv.second);
+                    out << ")";
+                } else if (kv.second.is_end_of_string) {
+                    out << " " << char_rule << "+";
+                }
+            }
+            if (!node.children.empty()) {
+                if (!first) {
+                    out << " | ";
+                }
+                out << "[^\"" << rejects.str() << "] " << char_rule << "*";
+            }
+        };
+        visit(trie);
+
+        out << " )";
+        if (!trie.is_end_of_string) {
+            out << "?";
+        }
+        out << " [\"] space";
+        return out.str();
+    }
+
     std::string _resolve_ref(const std::string & ref) {
         std::string ref_name = ref.substr(ref.find_last_of('/') + 1);
         if (_rules.find(ref_name) == _rules.end() && _refs_being_resolved.find(ref) == _refs_being_resolved.end()) {
@@ -634,6 +703,7 @@ class SchemaConverter {
         std::vector<std::string> required_props;
         std::vector<std::string> optional_props;
         std::unordered_map<std::string, std::string> prop_kv_rule_names;
+        std::vector<std::string> prop_names;
         for (const auto & kv : properties) {
             const auto &prop_name = kv.first;
             const auto &prop_schema = kv.second;
@@ -648,11 +718,18 @@ class SchemaConverter {
             } else {
                 optional_props.push_back(prop_name);
             }
+            prop_names.push_back(prop_name);
         }
-        if (additional_properties.is_object() || (additional_properties.is_boolean() && additional_properties.get<bool>())) {
+        if (!(additional_properties.is_boolean() && !additional_properties.get<bool>())) {
             std::string sub_name = name + (name.empty() ? "" : "-") + "additional";
-            std::string value_rule = visit(additional_properties.is_object() ? additional_properties : json::object(), sub_name + "-value");
-            std::string kv_rule = _add_rule(sub_name + "-kv", _add_primitive("string", PRIMITIVE_RULES.at("string")) + " \":\" space " + value_rule);
+            std::string value_rule =
+                additional_properties.is_object() ? visit(additional_properties, sub_name + "-value")
+                : _add_primitive("value", PRIMITIVE_RULES.at("value"));
+
+            auto key_rule =
+                prop_names.empty() ? _add_primitive("string", PRIMITIVE_RULES.at("string"))
+                : _add_rule(sub_name + "-k", _not_strings(prop_names));
+            std::string kv_rule = _add_rule(sub_name + "-kv", key_rule + " \":\" space " + value_rule);
             prop_kv_rule_names["*"] = kv_rule;
             optional_props.push_back("*");
         }
@@ -678,15 +755,11 @@ class SchemaConverter {
                 }
                 std::string k = ks[0];
                 std::string kv_rule_name = prop_kv_rule_names[k];
-                if (k == "*") {
-                    res = _add_rule(
-                        name + (name.empty() ? "" : "-") + "additional-kvs",
-                        kv_rule_name + " ( \",\" space " + kv_rule_name + " )*"
-                    );
-                } else if (first_is_optional) {
-                    res = "( \",\" space " + kv_rule_name + " )?";
+                std::string comma_ref = "( \",\" space " + kv_rule_name + " )";
+                if (first_is_optional) {
+                    res = comma_ref + (k == "*" ? "*" : "?");
                 } else {
-                    res = kv_rule_name;
+                    res = kv_rule_name + (k == "*" ? " " + comma_ref + "*" : "");
                 }
                 if (ks.size() > 1) {
                     res += " " + _add_rule(
@@ -824,13 +897,13 @@ class SchemaConverter {
             }
             return _add_rule(rule_name, _generate_union_rule(name, schema_types));
         } else if (schema.contains("const")) {
-            return _add_rule(rule_name, _generate_constant_rule(schema["const"]));
+            return _add_rule(rule_name, _generate_constant_rule(schema["const"]) + " space");
         } else if (schema.contains("enum")) {
             std::vector<std::string> enum_values;
             for (const auto & v : schema["enum"]) {
                 enum_values.push_back(_generate_constant_rule(v));
             }
-            return _add_rule(rule_name, join(enum_values.begin(), enum_values.end(), " | "));
+            return _add_rule(rule_name, "(" + join(enum_values.begin(), enum_values.end(), " | ") + ") space");
         } else if ((schema_type.is_null() || schema_type == "object")
                 && (schema.contains("properties") ||
                     (schema.contains("additionalProperties") && schema["additionalProperties"] != true))) {
diff --git a/examples/json-schema-pydantic-example.py b/examples/json-schema-pydantic-example.py
index 2240188cd031e..2a24f81189fb1 100644
--- a/examples/json-schema-pydantic-example.py
+++ b/examples/json-schema-pydantic-example.py
@@ -3,7 +3,7 @@
 #! pip install pydantic
 #! python json-schema-pydantic-example.py
 
-from pydantic import BaseModel, TypeAdapter
+from pydantic import BaseModel, Extra, TypeAdapter
 from annotated_types import MinLen
 from typing import Annotated, List, Optional
 import json, requests
@@ -50,12 +50,16 @@ def create_completion(*, response_model=None, endpoint="http://localhost:8080/v1
 if __name__ == '__main__':
 
     class QAPair(BaseModel):
+        class Config:
+            extra = 'forbid'  # triggers additionalProperties: false in the JSON schema
         question: str
         concise_answer: str
         justification: str
         stars: Annotated[int, Field(ge=1, le=5)]
 
     class PyramidalSummary(BaseModel):
+        class Config:
+            extra = 'forbid'  # triggers additionalProperties: false in the JSON schema
         title: str
         summary: str
         question_answers: Annotated[List[QAPair], MinLen(2)]
diff --git a/examples/json_schema_to_grammar.py b/examples/json_schema_to_grammar.py
index 86500a8c3c238..3f3132f88a824 100755
--- a/examples/json_schema_to_grammar.py
+++ b/examples/json_schema_to_grammar.py
@@ -4,8 +4,7 @@
 import json
 import re
 import sys
-from typing import Any, Callable, Dict, List, Optional, Set, Tuple, Union
-
+from typing import Any, List, Optional, Set, Tuple, Union
 
 def _build_repetition(item_rule, min_items, max_items, separator_rule=None):
 
@@ -276,6 +275,51 @@ def recurse(i: int):
 
         return ''.join(('(', *recurse(0), ')'))
 
+    def _not_strings(self, strings):
+        class TrieNode:
+            def __init__(self):
+                self.children = {}
+                self.is_end_of_string = False
+
+            def insert(self, string):
+                node = self
+                for c in string:
+                    node = node.children.setdefault(c, TrieNode())
+                node.is_end_of_string = True
+
+        trie = TrieNode()
+        for s in strings:
+            trie.insert(s)
+
+        char_rule = self._add_primitive('char', PRIMITIVE_RULES['char'])
+        out = ['["] ( ']
+
+        def visit(node):
+            rejects = []
+            first = True
+            for c in sorted(node.children.keys()):
+                child = node.children[c]
+                rejects.append(c)
+                if first:
+                    first = False
+                else:
+                    out.append(' | ')
+                out.append(f'[{c}]')
+                if child.children:
+                    out.append(f' (')
+                    visit(child)
+                    out.append(')')
+                elif child.is_end_of_string:
+                    out.append(f' {char_rule}+')
+            if node.children:
+                if not first:
+                    out.append(' | ')
+                out.append(f'[^"{"".join(rejects)}] {char_rule}*')
+        visit(trie)
+
+        out.append(f' ){"" if trie.is_end_of_string else "?"} ["] space')
+        return ''.join(out)
+
     def _add_rule(self, name, rule):
         esc_name = INVALID_RULE_CHARS_RE.sub('-', name)
         if esc_name not in self._rules or self._rules[esc_name] == rule:
@@ -524,10 +568,10 @@ def visit(self, schema, name):
             return self._add_rule(rule_name, self._generate_union_rule(name, [{'type': t} for t in schema_type]))
 
         elif 'const' in schema:
-            return self._add_rule(rule_name, self._generate_constant_rule(schema['const']))
+            return self._add_rule(rule_name, self._generate_constant_rule(schema['const']) + ' space')
 
         elif 'enum' in schema:
-            rule = ' | '.join((self._generate_constant_rule(v) for v in schema['enum']))
+            rule = '(' + ' | '.join((self._generate_constant_rule(v) for v in schema['enum'])) + ') space'
             return self._add_rule(rule_name, rule)
 
         elif schema_type in (None, 'object') and \
@@ -632,7 +676,7 @@ def _add_primitive(self, name: str, rule: BuiltinRule):
                 self._add_primitive(dep, dep_rule)
         return n
 
-    def _build_object_rule(self, properties: List[Tuple[str, Any]], required: Set[str], name: str, additional_properties: Union[bool, Any]):
+    def _build_object_rule(self, properties: List[Tuple[str, Any]], required: Set[str], name: str, additional_properties: Optional[Union[bool, Any]]):
         prop_order = self._prop_order
         # sort by position in prop_order (if specified) then by original order
         sorted_props = [kv[0] for _, kv in sorted(enumerate(properties), key=lambda ikv: (prop_order.get(ikv[1][0], len(prop_order)), ikv[0]))]
@@ -647,12 +691,16 @@ def _build_object_rule(self, properties: List[Tuple[str, Any]], required: Set[st
         required_props = [k for k in sorted_props if k in required]
         optional_props = [k for k in sorted_props if k not in required]
 
-        if additional_properties == True or isinstance(additional_properties, dict):
+        if additional_properties != False:
             sub_name = f'{name}{"-" if name else ""}additional'
-            value_rule = self.visit({} if additional_properties == True else additional_properties, f'{sub_name}-value')
+            value_rule = self.visit(additional_properties, f'{sub_name}-value') if isinstance(additional_properties, dict) else \
+                self._add_primitive('value', PRIMITIVE_RULES['value'])
+            key_rule = self._add_primitive('string', PRIMITIVE_RULES['string']) if not sorted_props \
+                else self._add_rule(f'{sub_name}-k', self._not_strings(sorted_props))
+
             prop_kv_rule_names["*"] = self._add_rule(
                 f'{sub_name}-kv',
-                self._add_primitive('string', PRIMITIVE_RULES['string']) + f' ":" space {value_rule}'
+                f'{key_rule} ":" space {value_rule}'
             )
             optional_props.append("*")
 
@@ -667,15 +715,11 @@ def _build_object_rule(self, properties: List[Tuple[str, Any]], required: Set[st
             def get_recursive_refs(ks, first_is_optional):
                 [k, *rest] = ks
                 kv_rule_name = prop_kv_rule_names[k]
-                if k == '*':
-                    res = self._add_rule(
-                        f'{name}{"-" if name else ""}additional-kvs',
-                        f'{kv_rule_name} ( "," space ' + kv_rule_name + ' )*'
-                    )
-                elif first_is_optional:
-                    res = f'( "," space {kv_rule_name} )?'
+                comma_ref = f'( "," space {kv_rule_name} )'
+                if first_is_optional:
+                    res = comma_ref + ('*' if k == '*' else '?')
                 else:
-                    res = kv_rule_name
+                    res = kv_rule_name + (' ' + comma_ref + "*" if k == '*' else '')
                 if len(rest) > 0:
                     res += ' ' + self._add_rule(
                         f'{name}{"-" if name else ""}{k}-rest',
diff --git a/examples/server/public/json-schema-to-grammar.mjs b/examples/server/public/json-schema-to-grammar.mjs
index f340f94bd75bc..02015bbd49015 100644
--- a/examples/server/public/json-schema-to-grammar.mjs
+++ b/examples/server/public/json-schema-to-grammar.mjs
@@ -532,6 +532,64 @@ export class SchemaConverter {
     return this._addRule(name, "\"\\\"\" " + toRule(transform()) + " \"\\\"\" space")
   }
 
+  _notStrings(strings) {
+    class TrieNode {
+      constructor() {
+        this.children = {};
+        this.isEndOfString = false;
+      }
+
+      insert(str) {
+        let node = this;
+        for (const c of str) {
+          node = node.children[c] = node.children[c] || new TrieNode();
+        }
+        node.isEndOfString = true;
+      }
+    }
+
+    const trie = new TrieNode();
+    for (const s of strings) {
+      trie.insert(s);
+    }
+
+    const charRuleName = this._addPrimitive('char', PRIMITIVE_RULES['char']);
+    const out = ['["] ( '];
+
+    const visit = (node) => {
+      const rejects = [];
+      let first = true;
+      for (const c of Object.keys(node.children).sort()) {
+        const child = node.children[c];
+        rejects.push(c);
+        if (first) {
+          first = false;
+        } else {
+          out.push(' | ');
+        }
+        out.push(`[${c}]`);
+        if (Object.keys(child.children).length > 0) {
+          out.push(' (');
+          visit(child);
+          out.push(')');
+        } else if (child.isEndOfString) {
+          out.push(` ${charRuleName}+`);
+        }
+      }
+      if (Object.keys(node.children).length > 0) {
+        if (!first) {
+          out.push(' | ');
+        }
+        out.push(`[^"${rejects.join('')}] ${charRuleName}*`);
+      }
+    };
+
+    visit(trie);
+
+    out.push(` )${trie.isEndOfString ? '' : '?'} ["] space`);
+    return out.join('');
+  }
+
   _resolveRef(ref) {
     let refName = ref.split('/').pop();
     if (!(refName in this._rules) && !this._refsBeingResolved.has(ref)) {
@@ -560,9 +618,9 @@ export class SchemaConverter {
     } else if (Array.isArray(schemaType)) {
       return this._addRule(ruleName, this._generateUnionRule(name, schemaType.map(t => ({ type: t }))));
     } else if ('const' in schema) {
-      return this._addRule(ruleName, this._generateConstantRule(schema.const));
+      return this._addRule(ruleName, this._generateConstantRule(schema.const) + ' space');
     } else if ('enum' in schema) {
-      const rule = schema.enum.map(v => this._generateConstantRule(v)).join(' | ');
+      const rule = '(' + schema.enum.map(v => this._generateConstantRule(v)).join(' | ') + ') space';
       return this._addRule(ruleName, rule);
     } else if ((schemaType === undefined || schemaType === 'object') &&
                ('properties' in schema ||
@@ -599,7 +657,7 @@ export class SchemaConverter {
         }
       }
 
-      return this._addRule(ruleName, this._buildObjectRule(properties, required, name, /* additionalProperties= */ false));
+      return this._addRule(ruleName, this._buildObjectRule(properties, required, name, null));
     } else if ((schemaType === undefined || schemaType === 'array') && ('items' in schema || 'prefixItems' in schema)) {
       const items = schema.items ?? schema.prefixItems;
       if (Array.isArray(items)) {
@@ -693,12 +751,19 @@ export class SchemaConverter {
     const requiredProps = sortedProps.filter(k => required.has(k));
     const optionalProps = sortedProps.filter(k => !required.has(k));
 
-    if (typeof additionalProperties === 'object' || additionalProperties === true) {
+    if (additionalProperties !== false) {
       const subName = `${name ?? ''}${name ? '-' : ''}additional`;
-      const valueRule = this.visit(additionalProperties === true ? {} : additionalProperties, `${subName}-value`);
+      const valueRule =
+        additionalProperties != null && typeof additionalProperties === 'object' ? this.visit(additionalProperties, `${subName}-value`)
+        : this._addPrimitive('value', PRIMITIVE_RULES['value']);
+
+      const key_rule =
+        sortedProps.length === 0 ? this._addPrimitive('string', PRIMITIVE_RULES['string'])
+        : this._addRule(`${subName}-k`, this._notStrings(sortedProps));
+
       propKvRuleNames['*'] = this._addRule(
         `${subName}-kv`,
-        `${this._addPrimitive('string', PRIMITIVE_RULES['string'])} ":" space ${valueRule}`);
+        `${key_rule} ":" space ${valueRule}`);
       optionalProps.push('*');
     }
 
@@ -715,15 +780,11 @@ export class SchemaConverter {
         const [k, ...rest] = ks;
         const kvRuleName = propKvRuleNames[k];
         let res;
-        if (k === '*') {
-            res = this._addRule(
-                `${name ?? ''}${name ? '-' : ''}additional-kvs`,
-                `${kvRuleName} ( "," space ` + kvRuleName + ` )*`
-            )
-        } else if (firstIsOptional) {
-          res = `( "," space ${kvRuleName} )?`;
+        const commaRef = `( "," space ${kvRuleName} )`;
+        if (firstIsOptional) {
+          res = commaRef + (k === '*' ? '*' : '?');
         } else {
-          res = kvRuleName;
+          res = kvRuleName + (k === '*' ? ' ' + commaRef + '*' : '');
         }
         if (rest.length > 0) {
           res += ' ' + this._addRule(
diff --git a/examples/server/tests/features/server.feature b/examples/server/tests/features/server.feature
index d21c09135243a..b55971454afc3 100644
--- a/examples/server/tests/features/server.feature
+++ b/examples/server/tests/features/server.feature
@@ -82,7 +82,7 @@ Feature: llama.cpp server
 
     Examples: Prompts
       | response_format                                                     | n_predicted | re_content             |
-      | {"type": "json_object", "schema": {"const": "42"}}                  | 5           | "42"                   |
+      | {"type": "json_object", "schema": {"const": "42"}}                  | 6           | "42"                   |
       | {"type": "json_object", "schema": {"items": [{"type": "integer"}]}} | 10          | \[ -300 \]             |
       | {"type": "json_object"}                                             | 10          | \{ " Jacky.            |
 
diff --git a/tests/test-grammar-integration.cpp b/tests/test-grammar-integration.cpp
index 5750d362a7247..23ef8324c1327 100644
--- a/tests/test-grammar-integration.cpp
+++ b/tests/test-grammar-integration.cpp
@@ -15,8 +15,6 @@
 
 using json = nlohmann::ordered_json;
 
-//#define INCLUDE_FAILING_TESTS 1
-
 static llama_grammar* build_grammar(const std::string & grammar_str) {
     auto parsed_grammar = grammar_parser::parse(grammar_str.c_str());
 
@@ -754,7 +752,7 @@ static void test_json_schema() {
         )""",
         // Passing strings
         {
-            "{}",
+            R"""({})""",
             R"""({"foo": "bar"})""",
         },
         // Failing strings
@@ -762,7 +760,7 @@ static void test_json_schema() {
             "",
             "[]",
             "null",
-            "\"\"",
+            R"""("")""",
             "true",
         }
     );
@@ -770,16 +768,14 @@ static void test_json_schema() {
     test_schema(
         "exotic formats (list)",
         // Schema
-        R"""(
-            {
+        R"""({
             "items": [
                 { "format": "date" },
                 { "format": "uuid" },
                 { "format": "time" },
                 { "format": "date-time" }
             ]
-            }
-        )""",
+        })""",
         // Passing strings
         {
             // "{}", // NOTE: This string passes for this schema on https://www.jsonschemavalidator.net/ -- should it?
@@ -798,125 +794,113 @@ static void test_json_schema() {
     test_schema(
         "string",
         // Schema
-        R"""(
-            {
-                "type": "string"
-            }
-        )""",
+        R"""({
+            "type": "string"
+        })""",
         // Passing strings
         {
-            "\"foo\"",
-            "\"bar\"",
-            "\"\"",
+            R"""("foo")""",
+            R"""("bar")""",
+            R"""("")""",
         },
         // Failing strings
         {
-            "{}",
-            "\"foo\": \"bar\"",
+            R"""({})""",
+            R"""("foo": "bar")""",
         }
     );
 
     test_schema(
         "string w/ min length 1",
         // Schema
-        R"""(
-            {
-                "type": "string",
-                "minLength": 1
-            }
-        )""",
+        R"""({
+            "type": "string",
+            "minLength": 1
+        })""",
         // Passing strings
         {
-            "\"foo\"",
-            "\"bar\"",
+            R"""("foo")""",
+            R"""("bar")""",
         },
         // Failing strings
         {
-            "\"\"",
-            "{}",
-            "\"foo\": \"bar\"",
+            R"""("")""",
+            R"""({})""",
+            R"""("foo": "bar")""",
         }
     );
 
     test_schema(
         "string w/ min length 3",
         // Schema
-        R"""(
-            {
+        R"""({
                 "type": "string",
                 "minLength": 3
-            }
-        )""",
+        })""",
         // Passing strings
         {
-            "\"foo\"",
-            "\"bar\"",
-            "\"foobar\"",
+            R"""("foo")""",
+            R"""("bar")""",
+            R"""("foobar")""",
         },
         // Failing strings
         {
-            "\"\"",
-            "\"f\"",
-            "\"fo\"",
+            R"""("")""",
+            R"""("f")""",
+            R"""("fo")""",
         }
     );
 
     test_schema(
         "string w/ max length",
         // Schema
-        R"""(
-            {
-                "type": "string",
-                "maxLength": 3
-            }
-        )""",
+        R"""({
+            "type": "string",
+            "maxLength": 3
+        })""",
         // Passing strings
         {
-            "\"foo\"",
-            "\"bar\"",
-            "\"\"",
-            "\"f\"",
-            "\"fo\"",
+            R"""("foo")""",
+            R"""("bar")""",
+            R"""("")""",
+            R"""("f")""",
+            R"""("fo")""",
         },
         // Failing strings
         {
-            "\"foobar\"",
+            R"""("foobar")""",
         }
     );
 
     test_schema(
         "string w/ min & max length",
         // Schema
-        R"""(
-            {
-                "type": "string",
-                "minLength": 1,
-                "maxLength": 4
-            }
-        )""",
+        R"""({
+            "type": "string",
+            "minLength": 1,
+            "maxLength": 4
+        })""",
         // Passing strings
         {
-            "\"foo\"",
-            "\"bar\"",
-            "\"f\"",
-            "\"barf\"",
+            R"""("foo")""",
+            R"""("bar")""",
+            R"""("f")""",
+            R"""("barf")""",
         },
         // Failing strings
         {
-            "\"\"",
-            "\"barfo\"",
-            "\"foobar\"",
+            R"""("")""",
+            R"""("barfo")""",
+            R"""("foobar")""",
         }
     );
 
     test_schema(
         "boolean",
         // Schema
-        R"""(
-            {
-                "type": "boolean"
-            }
-        )""",
+        R"""({
+            "type": "boolean"
+        })""",
         // Passing strings
         {
             "true",
@@ -924,122 +908,112 @@ static void test_json_schema() {
         },
         // Failing strings
         {
-            "\"\"",
-            "\"true\"",
-            "True",
-            "FALSE",
+            R"""("")""",
+            R"""("true")""",
+            R"""(True)""",
+            R"""(FALSE)""",
         }
     );
 
     test_schema(
         "integer",
         // Schema
-        R"""(
-            {
-                "type": "integer"
-            }
-        )""",
+        R"""({
+            "type": "integer"
+        })""",
         // Passing strings
         {
-            "0",
-            "12345",
-            "1234567890123456"
+            R"""(0)""",
+            R"""(12345)""",
+            R"""(1234567890123456)""",
         },
         // Failing strings
         {
-            "",
-            "01",
-            "007",
-            "12345678901234567"
+            R"""()""",
+            R"""(01)""",
+            R"""(007)""",
+            R"""(12345678901234567  )""",
         }
     );
 
     test_schema(
         "string const",
         // Schema
-        R"""(
-            {
-                "const": "foo"
-            }
-        )""",
+        R"""({
+            "const": "foo"
+        })""",
         // Passing strings
         {
-            "\"foo\"",
+            R"""("foo")""",
         },
         // Failing strings
         {
-            "foo",
-            "\"bar\"",
+            R"""(foo)""",
+            R"""("bar")""",
         }
     );
 
     test_schema(
         "non-string const",
         // Schema
-        R"""(
-            {
-                "const": true
-            }
-        )""",
+        R"""({
+            "const": true
+        })""",
         // Passing strings
         {
-            "true",
+            R"""(true)""",
         },
         // Failing strings
         {
-            "",
-            "foo",
-            "\"true\"",
+            R"""()""",
+            R"""(foo)""",
+            R"""("true")""",
         }
     );
 
     test_schema(
         "non-string const",
         // Schema
-        R"""(
-            {
-                "enum": ["red", "amber", "green", null, 42, ["foo"]]
-            }
-        )""",
+        R"""({
+            "enum": ["red", "amber", "green", null, 42, ["foo"]]
+        })""",
         // Passing strings
         {
-            "\"red\"",
-            "null",
-            "42",
-            "[\"foo\"]",
+            R"""("red")""",
+            R"""(null)""",
+            R"""(42)""",
+            R"""(["foo"])""",
         },
         // Failing strings
         {
-            "",
-            "420",
-            "true",
-            "foo",
+            R"""()""",
+            R"""(420)""",
+            R"""(true)""",
+            R"""(foo)""",
         }
     );
 
     test_schema(
         "min+max items",
         // Schema
-        R"""(
-            {
-                "items": {
-                    "type": ["number", "integer"]
-                },
-                "minItems": 3,
-                "maxItems": 5
-            }
-        )""",
+        R"""({
+            "items": {
+                "type": ["number", "integer"]
+            },
+            "minItems": 3,
+            "maxItems": 5
+        })""",
         // Passing strings
         {
-            "[1, 2, 3]",
-            "[1, 2, 3, 4]",
-            "[1, 2, 3, 4, 5]",
+            R"""([1, 2, 3])""",
+            R"""([1, 2, 3, 4])""",
+            R"""([1, 2, 3, 4, 5])""",
         },
         // Failing strings
         {
-            "[1, 2]",
-            "[1, 2, 3, 4, 5, 6]",
-            "1"
+            R"""([1, 2])""",
+            R"""([1, 2, 3, 4, 5, 6])""",
+            R"""(1)""",
         }
     );
 
@@ -1047,16 +1021,14 @@ static void test_json_schema() {
     test_schema(
         "object properties",
         // Schema
-        R"""(
-            {
+        R"""({
             "type": "object",
             "properties": {
                 "number": { "type": "number" },
                 "street_name": { "type": "string" },
                 "street_type": { "enum": ["Street", "Avenue", "Boulevard"] }
             }
-            }
-        )""",
+        })""",
         // Passing strings
         {
             R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type":"Avenue"})""",
@@ -1066,12 +1038,8 @@ static void test_json_schema() {
             // "By extension, even an empty object is valid"
             R"""({})""",
             // "By default, providing additional properties is valid"
-#ifdef INCLUDE_FAILING_TESTS
-            // TODO: The following should pass, but currently FAILS. Additional properties should be permitted by default.
             R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type":"Avenue", "direction":"NW"})""",
-            // TODO: Spaces should be permitted around enum values, but currently they fail to pass.
             R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type": "Avenue" })""",
-#endif
         },
         // Failing strings
         {
@@ -1084,13 +1052,35 @@ static void test_json_schema() {
         }
     );
 
+    test_schema(
+        "additional properties can't override other properties",
+        R"""({
+            "properties": {
+                "a": {"type": "integer"},
+                "b": {"type": "integer"}
+            },
+            "additionalProperties": true
+        })""",
+        // Passing strings
+        {
+            R"""({"a": 42})""",
+            R"""({"c": ""})""",
+            R"""({"a": 42, "c": ""})""",
+            R"""({"a_": ""})""",
+        },
+        // Failing strings
+        {
+            R"""()""",
+            R"""({"a": ""})""",
+            R"""({"a": "", "b": ""})""",
+        }
+    );
 
     // Properties (from: https://json-schema.org/understanding-json-schema/reference/object#properties)
     test_schema(
         "object properties, additionalProperties: true",
         // Schema
-        R"""(
-            {
+        R"""({
             "type": "object",
             "properties": {
                 "number": { "type": "number" },
@@ -1098,26 +1088,18 @@ static void test_json_schema() {
                 "street_type": { "enum": ["Street", "Avenue", "Boulevard"] }
             },
             "additionalProperties": true
-            }
-        )""",
+        })""",
         // Passing strings
         {
             // "By extension, even an empty object is valid"
             R"""({})""",
-#ifdef INCLUDE_FAILING_TESTS
-            // TODO: Following line should pass and doesn't
             R"""({"number":1600,"street_name":"Pennsylvania","street_type":"Avenue"})""",
             // "By default, leaving out properties is valid"
-            // TODO: Following line should pass and doesn't
             R"""({ "street_name": "Pennsylvania" })""",
-            // TODO: Following line should pass and doesn't
             R"""({ "number": 1600, "street_name": "Pennsylvania" })""",
             // "By default, providing additional properties is valid"
-            // TODO: The following should pass, but currently FAILS. Additional properties should be permitted by default.
             R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type":"Avenue", "direction":"NW"})""",
-            // TODO: Spaces should be permitted around enum values, but currently they fail to pass.
             R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type": "Avenue" })""",
-#endif
         },
         // Failing strings
         {
@@ -1132,8 +1114,7 @@ static void test_json_schema() {
     test_schema(
         "required + optional props each in original order",
         // Schema
-        R"""(
-            {
+        R"""({
             "type": "object",
             "properties": {
                 "number": { "type": "number" },
@@ -1141,18 +1122,15 @@ static void test_json_schema() {
                 "street_type": { "enum": ["Street", "Avenue", "Boulevard"] }
             },
             "additionalProperties": false
-            }
-        )""",
+        })""",
         // Passing strings
         {
             R"""({ "street_name": "Pennsylvania" })""",
             R"""({ "number": 1600, "street_type":"Avenue"})""",
             R"""({ "number": 1600, "street_name": "Pennsylvania" })""",
             R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type":"Avenue"})""",
-#ifdef INCLUDE_FAILING_TESTS
-            // TODO: Spaces should be permitted around enum values, but currently they fail to pass.
+            // Spaces are permitted around enum values
             R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type": "Avenue" })""",
-#endif
         },
         // Failing strings
         {
@@ -1166,18 +1144,16 @@ static void test_json_schema() {
     test_schema(
         "required + optional props each in original order",
         // Schema
-        R"""(
-            {
-                "properties": {
-                    "b": {"type": "string"},
-                    "a": {"type": "string"},
-                    "d": {"type": "string"},
-                    "c": {"type": "string"}
-                },
-                "required": ["a", "b"],
-                "additionalProperties": false
-            }
-        )""",
+        R"""({
+            "properties": {
+                "b": {"type": "string"},
+                "a": {"type": "string"},
+                "d": {"type": "string"},
+                "c": {"type": "string"}
+            },
+            "required": ["a", "b"],
+            "additionalProperties": false
+        })""",
         // Passing strings
         {
             R"""({"b": "foo", "a": "bar"})""",
@@ -1197,8 +1173,7 @@ static void test_json_schema() {
     test_schema(
         "required props",
         // Schema
-        R"""(
-            {
+        R"""({
             "$schema": "https://json-schema.org/draft/2020-12/schema",
             "$id": "https://example.com/product.schema.json",
             "title": "Product",
@@ -1244,8 +1219,7 @@ static void test_json_schema() {
                 }
             },
             "required": [ "productId", "productName", "price" ]
-            }
-        )""",
+        })""",
         // Passing strings
         {
             R"""({"productId": 1, "productName": "A green door", "price": 12.50})""",
diff --git a/tests/test-json-schema-to-grammar.cpp b/tests/test-json-schema-to-grammar.cpp
index 2e591bd71abaa..1e69cb6ef36be 100755
--- a/tests/test-json-schema-to-grammar.cpp
+++ b/tests/test-json-schema-to-grammar.cpp
@@ -473,7 +473,7 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
             "const": "foo"
         })""",
         R"""(
-            root ::= "\"foo\""
+            root ::= "\"foo\"" space
             space ::= | " " | "\n" [ \t]{0,20}
         )"""
     });
@@ -485,7 +485,7 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
             "const": 123
         })""",
         R"""(
-            root ::= "123"
+            root ::= "123" space
             space ::= | " " | "\n" [ \t]{0,20}
         )"""
     });
@@ -497,7 +497,7 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
             "enum": ["red", "amber", "green", null, 42, ["foo"]]
         })""",
         R"""(
-            root ::= "\"red\"" | "\"amber\"" | "\"green\"" | "null" | "42" | "[\"foo\"]"
+            root ::= ("\"red\"" | "\"amber\"" | "\"green\"" | "null" | "42" | "[\"foo\"]") space
             space ::= | " " | "\n" [ \t]{0,20}
         )"""
     });
@@ -816,13 +816,12 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
         })""",
         R"""(
             additional-kv ::= string ":" space additional-value
-            additional-kvs ::= additional-kv ( "," space additional-kv )*
             additional-value ::= "[" space (number ("," space number)*)? "]" space
             char ::= [^"\\\x7F\x00-\x1F] | [\\] (["\\bfnrt] | "u" [0-9a-fA-F]{4})
             decimal-part ::= [0-9]{1,16}
             integral-part ::= [0] | [1-9] [0-9]{0,15}
             number ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space
-            root ::= "{" space  (additional-kvs )? "}" space
+            root ::= "{" space  (additional-kv ( "," space additional-kv )* )? "}" space
             space ::= | " " | "\n" [ \t]{0,20}
             string ::= "\"" char* "\"" space
         )"""
@@ -899,13 +898,13 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
         })""",
         R"""(
             a-kv ::= "\"a\"" space ":" space number
-            additional-kv ::= string ":" space string
-            additional-kvs ::= additional-kv ( "," space additional-kv )*
+            additional-k ::= ["] ( [a] char+ | [^"a] char* )? ["] space
+            additional-kv ::= additional-k ":" space string
             char ::= [^"\\\x7F\x00-\x1F] | [\\] (["\\bfnrt] | "u" [0-9a-fA-F]{4})
             decimal-part ::= [0-9]{1,16}
             integral-part ::= [0] | [1-9] [0-9]{0,15}
             number ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space
-            root ::= "{" space a-kv ( "," space ( additional-kvs ) )? "}" space
+            root ::= "{" space a-kv ( "," space ( additional-kv ( "," space additional-kv )* ) )? "}" space
             space ::= | " " | "\n" [ \t]{0,20}
             string ::= "\"" char* "\"" space
         )"""
@@ -923,16 +922,15 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
         })""",
         R"""(
             a-kv ::= "\"a\"" space ":" space number
-            a-rest ::= additional-kvs
-            additional-kv ::= string ":" space number
-            additional-kvs ::= additional-kv ( "," space additional-kv )*
+            a-rest ::= ( "," space additional-kv )*
+            additional-k ::= ["] ( [a] char+ | [^"a] char* )? ["] space
+            additional-kv ::= additional-k ":" space number
             char ::= [^"\\\x7F\x00-\x1F] | [\\] (["\\bfnrt] | "u" [0-9a-fA-F]{4})
             decimal-part ::= [0-9]{1,16}
             integral-part ::= [0] | [1-9] [0-9]{0,15}
             number ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space
-            root ::= "{" space  (a-kv a-rest | additional-kvs )? "}" space
+            root ::= "{" space  (a-kv a-rest | additional-kv ( "," space additional-kv )* )? "}" space
             space ::= | " " | "\n" [ \t]{0,20}
-            string ::= "\"" char* "\"" space
         )"""
     });
 
@@ -942,25 +940,100 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
         R"""({
             "type": "object",
             "properties": {
-                "a": {"type": "number"},
-                "b": {"type": "number"}
+                "and": {"type": "number"},
+                "also": {"type": "number"}
             },
-            "required": ["a"],
+            "required": ["and"],
             "additionalProperties": {"type": "number"}
         })""",
         R"""(
-            a-kv ::= "\"a\"" space ":" space number
-            additional-kv ::= string ":" space number
-            additional-kvs ::= additional-kv ( "," space additional-kv )*
-            b-kv ::= "\"b\"" space ":" space number
-            b-rest ::= additional-kvs
+            additional-k ::= ["] ( [a] ([l] ([s] ([o] char+ | [^"o] char*) | [^"s] char*) | [n] ([d] char+ | [^"d] char*) | [^"ln] char*) | [^"a] char* )? ["] space
+            additional-kv ::= additional-k ":" space number
+            also-kv ::= "\"also\"" space ":" space number
+            also-rest ::= ( "," space additional-kv )*
+            and-kv ::= "\"and\"" space ":" space number
             char ::= [^"\\\x7F\x00-\x1F] | [\\] (["\\bfnrt] | "u" [0-9a-fA-F]{4})
             decimal-part ::= [0-9]{1,16}
             integral-part ::= [0] | [1-9] [0-9]{0,15}
             number ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space
-            root ::= "{" space a-kv ( "," space ( b-kv b-rest | additional-kvs ) )? "}" space
+            root ::= "{" space and-kv ( "," space ( also-kv also-rest | additional-kv ( "," space additional-kv )* ) )? "}" space
+            space ::= | " " | "\n" [ \t]{0,20}
+        )"""
+    });
+
+    test({
+        SUCCESS,
+        "optional props with empty name",
+        R"""({
+            "properties": {
+                "": {"type": "integer"},
+                "a": {"type": "integer"}
+            },
+            "additionalProperties": {"type": "integer"}
+        })""",
+        R"""(
+            -kv ::= "\"\"" space ":" space root
+            -rest ::= ( "," space a-kv )? a-rest
+            a-kv ::= "\"a\"" space ":" space integer
+            a-rest ::= ( "," space additional-kv )*
+            additional-k ::= ["] ( [a] char+ | [^"a] char* ) ["] space
+            additional-kv ::= additional-k ":" space integer
+            char ::= [^"\\\x7F\x00-\x1F] | [\\] (["\\bfnrt] | "u" [0-9a-fA-F]{4})
+            integer ::= ("-"? integral-part) space
+            integral-part ::= [0] | [1-9] [0-9]{0,15}
+            root ::= ("-"? integral-part) space
+            root0 ::= "{" space  (-kv -rest | a-kv a-rest | additional-kv ( "," space additional-kv )* )? "}" space
+            space ::= | " " | "\n" [ \t]{0,20}
+        )"""
+    });
+
+    test({
+        SUCCESS,
+        "optional props with nested names",
+        R"""({
+            "properties": {
+                "a": {"type": "integer"},
+                "aa": {"type": "integer"}
+            },
+            "additionalProperties": {"type": "integer"}
+        })""",
+        R"""(
+            a-kv ::= "\"a\"" space ":" space integer
+            a-rest ::= ( "," space aa-kv )? aa-rest
+            aa-kv ::= "\"aa\"" space ":" space integer
+            aa-rest ::= ( "," space additional-kv )*
+            additional-k ::= ["] ( [a] ([a] char+ | [^"a] char*) | [^"a] char* )? ["] space
+            additional-kv ::= additional-k ":" space integer
+            char ::= [^"\\\x7F\x00-\x1F] | [\\] (["\\bfnrt] | "u" [0-9a-fA-F]{4})
+            integer ::= ("-"? integral-part) space
+            integral-part ::= [0] | [1-9] [0-9]{0,15}
+            root ::= "{" space  (a-kv a-rest | aa-kv aa-rest | additional-kv ( "," space additional-kv )* )? "}" space
+            space ::= | " " | "\n" [ \t]{0,20}
+        )"""
+    });
+
+    test({
+        SUCCESS,
+        "optional props with common prefix",
+        R"""({
+            "properties": {
+                "ab": {"type": "integer"},
+                "ac": {"type": "integer"}
+            },
+            "additionalProperties": {"type": "integer"}
+        })""",
+        R"""(
+            ab-kv ::= "\"ab\"" space ":" space integer
+            ab-rest ::= ( "," space ac-kv )? ac-rest
+            ac-kv ::= "\"ac\"" space ":" space integer
+            ac-rest ::= ( "," space additional-kv )*
+            additional-k ::= ["] ( [a] ([b] char+ | [c] char+ | [^"bc] char*) | [^"a] char* )? ["] space
+            additional-kv ::= additional-k ":" space integer
+            char ::= [^"\\\x7F\x00-\x1F] | [\\] (["\\bfnrt] | "u" [0-9a-fA-F]{4})
+            integer ::= ("-"? integral-part) space
+            integral-part ::= [0] | [1-9] [0-9]{0,15}
+            root ::= "{" space  (ab-kv ab-rest | ac-kv ac-rest | additional-kv ( "," space additional-kv )* )? "}" space
             space ::= | " " | "\n" [ \t]{0,20}
-            string ::= "\"" char* "\"" space
         )"""
     });
 
@@ -1015,15 +1088,28 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
         R"""(
             alternative-0 ::= foo
             alternative-1 ::= bar
-            bar ::= "{" space  (bar-b-kv )? "}" space
+            array ::= "[" space ( value ("," space value)* )? "]" space
+            bar ::= "{" space  (bar-b-kv bar-b-rest | bar-additional-kv ( "," space bar-additional-kv )* )? "}" space
+            bar-additional-k ::= ["] ( [b] char+ | [^"b] char* )? ["] space
+            bar-additional-kv ::= bar-additional-k ":" space value
             bar-b-kv ::= "\"b\"" space ":" space number
+            bar-b-rest ::= ( "," space bar-additional-kv )*
+            boolean ::= ("true" | "false") space
+            char ::= [^"\\\x7F\x00-\x1F] | [\\] (["\\bfnrt] | "u" [0-9a-fA-F]{4})
             decimal-part ::= [0-9]{1,16}
-            foo ::= "{" space  (foo-a-kv )? "}" space
+            foo ::= "{" space  (foo-a-kv foo-a-rest | foo-additional-kv ( "," space foo-additional-kv )* )? "}" space
             foo-a-kv ::= "\"a\"" space ":" space number
+            foo-a-rest ::= ( "," space foo-additional-kv )*
+            foo-additional-k ::= ["] ( [a] char+ | [^"a] char* )? ["] space
+            foo-additional-kv ::= foo-additional-k ":" space value
             integral-part ::= [0] | [1-9] [0-9]{0,15}
+            null ::= "null" space
             number ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space
+            object ::= "{" space ( string ":" space value ("," space string ":" space value)* )? "}" space
             root ::= alternative-0 | alternative-1
             space ::= | " " | "\n" [ \t]{0,20}
+            string ::= "\"" char* "\"" space
+            value ::= object | array | string | number | boolean | null
         )"""
     });
 
@@ -1059,15 +1145,25 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
         })""",
         R"""(
             a-kv ::= "\"a\"" space ":" space number
+            additional-k ::= ["] ( [a] char+ | [b] char+ | [c] char+ | [d] char+ | [^"abcd] char* )? ["] space
+            additional-kv ::= additional-k ":" space value
+            array ::= "[" space ( value ("," space value)* )? "]" space
             b-kv ::= "\"b\"" space ":" space number
+            boolean ::= ("true" | "false") space
             c-kv ::= "\"c\"" space ":" space number
+            c-rest ::= ( "," space additional-kv )*
+            char ::= [^"\\\x7F\x00-\x1F] | [\\] (["\\bfnrt] | "u" [0-9a-fA-F]{4})
             d-kv ::= "\"d\"" space ":" space number
-            d-rest ::= ( "," space c-kv )?
+            d-rest ::= ( "," space c-kv )? c-rest
             decimal-part ::= [0-9]{1,16}
             integral-part ::= [0] | [1-9] [0-9]{0,15}
+            null ::= "null" space
             number ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space
-            root ::= "{" space a-kv "," space b-kv ( "," space ( d-kv d-rest | c-kv ) )? "}" space
+            object ::= "{" space ( string ":" space value ("," space string ":" space value)* )? "}" space
+            root ::= "{" space a-kv "," space b-kv ( "," space ( d-kv d-rest | c-kv c-rest | additional-kv ( "," space additional-kv )* ) )? "}" space
             space ::= | " " | "\n" [ \t]{0,20}
+            string ::= "\"" char* "\"" space
+            value ::= object | array | string | number | boolean | null
         )"""
     });
 

From 9b2f16f8055265c67e074025350736adc1ea0666 Mon Sep 17 00:00:00 2001
From: Olivier Chafik <ochafik@users.noreply.github.com>
Date: Wed, 26 Jun 2024 01:46:35 +0100
Subject: [PATCH 15/15] `json`: better support for "type" unions (e.g. nullable
 arrays w/ typed items) (#7863)

* json: better suport for "type" arrays (e.g. `{"type": ["array", "null"], "items": {"type": "string"}}`)

* json: add test for type: [array, null] fix

* update tests
---
 common/json-schema-to-grammar.cpp             |  4 ++-
 examples/json_schema_to_grammar.py            |  2 +-
 .../server/public/json-schema-to-grammar.mjs  |  2 +-
 tests/test-grammar-integration.cpp            | 25 +++++++++++++++
 tests/test-json-schema-to-grammar.cpp         | 32 +++++++++++++++++++
 5 files changed, 62 insertions(+), 3 deletions(-)

diff --git a/common/json-schema-to-grammar.cpp b/common/json-schema-to-grammar.cpp
index b40821dadc05e..2f233e2e7477f 100644
--- a/common/json-schema-to-grammar.cpp
+++ b/common/json-schema-to-grammar.cpp
@@ -893,7 +893,9 @@ class SchemaConverter {
         } else if (schema_type.is_array()) {
             std::vector<json> schema_types;
             for (const auto & t : schema_type) {
-                schema_types.push_back({{"type", t}});
+                json schema_copy(schema);
+                schema_copy["type"] = t;
+                schema_types.push_back(schema_copy);
             }
             return _add_rule(rule_name, _generate_union_rule(name, schema_types));
         } else if (schema.contains("const")) {
diff --git a/examples/json_schema_to_grammar.py b/examples/json_schema_to_grammar.py
index 3f3132f88a824..92f6e3d47bae7 100755
--- a/examples/json_schema_to_grammar.py
+++ b/examples/json_schema_to_grammar.py
@@ -565,7 +565,7 @@ def visit(self, schema, name):
             return self._add_rule(rule_name, self._generate_union_rule(name, schema.get('oneOf') or schema['anyOf']))
 
         elif isinstance(schema_type, list):
-            return self._add_rule(rule_name, self._generate_union_rule(name, [{'type': t} for t in schema_type]))
+            return self._add_rule(rule_name, self._generate_union_rule(name, [{**schema, 'type': t} for t in schema_type]))
 
         elif 'const' in schema:
             return self._add_rule(rule_name, self._generate_constant_rule(schema['const']) + ' space')
diff --git a/examples/server/public/json-schema-to-grammar.mjs b/examples/server/public/json-schema-to-grammar.mjs
index 02015bbd49015..06d76edde500a 100644
--- a/examples/server/public/json-schema-to-grammar.mjs
+++ b/examples/server/public/json-schema-to-grammar.mjs
@@ -616,7 +616,7 @@ export class SchemaConverter {
     } else if (schema.oneOf || schema.anyOf) {
       return this._addRule(ruleName, this._generateUnionRule(name, schema.oneOf || schema.anyOf));
     } else if (Array.isArray(schemaType)) {
-      return this._addRule(ruleName, this._generateUnionRule(name, schemaType.map(t => ({ type: t }))));
+      return this._addRule(ruleName, this._generateUnionRule(name, schemaType.map(t => ({...schema, type: t}))));
     } else if ('const' in schema) {
       return this._addRule(ruleName, this._generateConstantRule(schema.const) + ' space');
     } else if ('enum' in schema) {
diff --git a/tests/test-grammar-integration.cpp b/tests/test-grammar-integration.cpp
index 23ef8324c1327..0e21dc7959943 100644
--- a/tests/test-grammar-integration.cpp
+++ b/tests/test-grammar-integration.cpp
@@ -993,6 +993,31 @@ static void test_json_schema() {
         }
     );
 
+    test_schema(
+        "",
+        // Schema
+        R"""(
+            {
+                "type": ["array", "null"],
+                "items": { "type": "string" }
+            }
+        )""",
+        // Passing strings
+        {
+            "null",
+            "[]",
+            "[\"123\"]",
+            "[\"foo\", \"bar\"]",
+        },
+        // Failing strings
+        {
+            "",
+            "[123]",
+            "\"foo\"",
+            "[\"foo\", 42]",
+        }
+    );
+
     test_schema(
         "min+max items",
         // Schema
diff --git a/tests/test-json-schema-to-grammar.cpp b/tests/test-json-schema-to-grammar.cpp
index 1e69cb6ef36be..3aaa11833de57 100755
--- a/tests/test-json-schema-to-grammar.cpp
+++ b/tests/test-json-schema-to-grammar.cpp
@@ -502,6 +502,38 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
         )"""
     });
 
+    test({
+        SUCCESS,
+        "string array",
+        R"""({
+            "type": "array",
+            "prefixItems": { "type": "string" }
+        })""",
+        R"""(
+            char ::= [^"\\\x7F\x00-\x1F] | [\\] (["\\bfnrt] | "u" [0-9a-fA-F]{4})
+            root ::= "[" space (string ("," space string)*)? "]" space
+            space ::= | " " | "\n" [ \t]{0,20}
+            string ::= "\"" char* "\"" space
+        )"""
+    });
+
+    test({
+        SUCCESS,
+        "nullable string array",
+        R"""({
+            "type": ["array", "null"],
+            "prefixItems": { "type": "string" }
+        })""",
+        R"""(
+            alternative-0 ::= "[" space (string ("," space string)*)? "]" space
+            char ::= [^"\\\x7F\x00-\x1F] | [\\] (["\\bfnrt] | "u" [0-9a-fA-F]{4})
+            null ::= "null" space
+            root ::= alternative-0 | null
+            space ::= | " " | "\n" [ \t]{0,20}
+            string ::= "\"" char* "\"" space
+        )"""
+    });
+
     test({
         SUCCESS,
         "tuple1",