From 2df373ac40ea581ccca8a58c713f03ad9d4b658d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20G=C3=A4=C3=9Fler?= Date: Tue, 25 Jun 2024 01:22:33 +0200 Subject: [PATCH 01/15] CUDA: fix matrix multiplication algorithm choice (#8102) --- ggml-cuda.cu | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/ggml-cuda.cu b/ggml-cuda.cu index 2dda039242531..0acfda91d3e51 100644 --- a/ggml-cuda.cu +++ b/ggml-cuda.cu @@ -1924,16 +1924,16 @@ static void ggml_cuda_mul_mat(ggml_backend_cuda_context & ctx, const ggml_tensor } else if (!split && any_gpus_with_slow_fp16 && src0->type == GGML_TYPE_F16 && !ggml_is_contiguous(src0) && !ggml_is_transposed(src1) && src1->ne[1] == 1) { // FP32 precision KQV single-batch for batch size 1 without FlashAttention ggml_cuda_mul_mat_vec_nc(ctx, src0, src1, dst); + } else if (!split && src0->type == GGML_TYPE_F16 && (src1->type == GGML_TYPE_F16 || !any_gpus_with_slow_fp16) + && !ggml_is_transposed(src0) && !ggml_is_transposed(src1) && src1->ne[2]*src1->ne[3] > 1) { + // KQ + KQV multi-batch without FlashAttention + ggml_cuda_mul_mat_batched_cublas(ctx, src0, src1, dst); } else if (use_dequantize_mul_mat_vec) { ggml_cuda_op_mul_mat(ctx, src0, src1, dst, ggml_cuda_op_dequantize_mul_mat_vec, nullptr); } else if (use_mul_mat_vec_q) { ggml_cuda_op_mul_mat(ctx, src0, src1, dst, ggml_cuda_op_mul_mat_vec_q, quantize_row_q8_1_cuda); } else if (use_mul_mat_q) { ggml_cuda_op_mul_mat(ctx, src0, src1, dst, ggml_cuda_op_mul_mat_q, quantize_mmq_q8_1_cuda); - } else if (!split && src0->type == GGML_TYPE_F16 && (src1->type == GGML_TYPE_F16 || !any_gpus_with_slow_fp16) - && !ggml_is_transposed(src0) && !ggml_is_transposed(src1) && src1->ne[2]*src1->ne[3] > 1) { - // KQ + KQV multi-batch without FlashAttention - ggml_cuda_mul_mat_batched_cublas(ctx, src0, src1, dst); } else { ggml_cuda_op_mul_mat(ctx, src0, src1, dst, ggml_cuda_op_mul_mat_cublas, nullptr); } From 083bacce14c1aaf9976aa40e8266cdc25ac749d3 Mon Sep 17 00:00:00 2001 From: "Meng, Hengyu" Date: Tue, 25 Jun 2024 10:19:20 +0800 Subject: [PATCH 02/15] [SYCL] Re-enabled mul_mat_batched_sycl (#8095) --- ggml-sycl.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ggml-sycl.cpp b/ggml-sycl.cpp index e5ddf4a346c36..db045336f1edb 100644 --- a/ggml-sycl.cpp +++ b/ggml-sycl.cpp @@ -4620,7 +4620,7 @@ static void ggml_sycl_mul_mat(ggml_backend_sycl_context & ctx, const ggml_tensor } else if (!split && src0->type == GGML_TYPE_F16 && !ggml_is_contiguous(src0) && !ggml_is_transposed(src1) && src1->ne[1] == 1) { // KQV single-batch ggml_sycl_mul_mat_vec_nc(ctx, src0, src1, dst); - } else if (!split && src0->type == GGML_TYPE_F16 && (src1->type == GGML_TYPE_F16) && !ggml_is_transposed(src0) && !ggml_is_transposed(src1) && src1->ne[2]*src1->ne[3] > 1) { + } else if (!split && src0->type == GGML_TYPE_F16 && !ggml_is_transposed(src0) && !ggml_is_transposed(src1) && src1->ne[2]*src1->ne[3] > 1) { // KQ + KQV multi-batch ggml_sycl_mul_mat_batched_sycl(ctx, src0, src1, dst); } else if (use_dequantize_mul_mat_vec) { From f702a90e245499283d6de0b287701c723cda2a87 Mon Sep 17 00:00:00 2001 From: HatsuneMikuUwU33 <173229399+HatsuneMikuUwU33@users.noreply.github.com> Date: Tue, 25 Jun 2024 10:44:48 +0200 Subject: [PATCH 03/15] Update control vector help (#8104) --- common/common.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/common/common.cpp b/common/common.cpp index 1dc53265134a7..0ca7b4430f765 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -1538,9 +1538,11 @@ void gpt_params_print_usage(int /*argc*/, char ** argv, const gpt_params & param options.push_back({ "*", " --lora FNAME", "apply LoRA adapter (implies --no-mmap)" }); options.push_back({ "*", " --lora-scaled FNAME S", "apply LoRA adapter with user defined scaling S (implies --no-mmap)" }); options.push_back({ "*", " --lora-base FNAME", "optional model to use as a base for the layers modified by the LoRA adapter" }); - options.push_back({ "*", " --control-vector FNAME", "add a control vector" }); + options.push_back({ "*", " --control-vector FNAME", "add a control vector\n" + "note: this argument can be repeated to add multiple control vectors" }); options.push_back({ "*", " --control-vector-scaled FNAME SCALE", - "add a control vector with user defined scaling SCALE" }); + "add a control vector with user defined scaling SCALE\n" + "note: this argument can be repeated to add multiple scaled control vectors" }); options.push_back({ "*", " --control-vector-layer-range START END", "layer range to apply the control vector(s) to, start and end inclusive" }); options.push_back({ "*", "-m, --model FNAME", "model path (default: models/$filename with filename from --hf-file\n" From 3791ad219323389106dc3fd80814eb5bbb7b80de Mon Sep 17 00:00:00 2001 From: HanishKVC Date: Tue, 25 Jun 2024 16:57:35 +0530 Subject: [PATCH 04/15] SimpleChat v3.1: Boolean chat request options in Settings UI, cache_prompt (#7950) * SimpleChat: Allow for chat req bool options to be user controlled * SimpleChat: Allow user to control cache_prompt flag in request * SimpleChat: Add sample GUI images to readme file Show the chat screen and the settings screen * SimpleChat:Readme: Add quickstart block, title to image, cleanup * SimpleChat: RePosition contents of the Info and Settings UI Make it more logically structured and flow through. * SimpleChat: Rename to apiRequestOptions from chatRequestOptions So that it is not wrongly assumed that these request options are used only for chat/completions endpoint. Rather these are used for both the end points, so rename to match semantic better. * SimpleChat: Update image included with readme wrt settings ui * SimpleChat:ReadMe: Switch to webp screen image to reduce size --- examples/server/public_simplechat/readme.md | 37 +++++--- .../server/public_simplechat/simplechat.js | 79 ++++++++++-------- .../public_simplechat/simplechat_screens.webp | Bin 0 -> 21376 bytes 3 files changed, 68 insertions(+), 48 deletions(-) create mode 100644 examples/server/public_simplechat/simplechat_screens.webp diff --git a/examples/server/public_simplechat/readme.md b/examples/server/public_simplechat/readme.md index 2dc1778255256..21410199f6016 100644 --- a/examples/server/public_simplechat/readme.md +++ b/examples/server/public_simplechat/readme.md @@ -3,6 +3,13 @@ by Humans for All. +## quickstart + +To run from the build dir + +bin/llama-server -m path/model.gguf --path ../examples/server/public_simplechat + +Continue reading for the details. ## overview @@ -14,6 +21,8 @@ own system prompts. This allows seeing the generated text / ai-model response in oneshot at the end, after it is fully generated, or potentially as it is being generated, in a streamed manner from the server/ai-model. +![Chat and Settings screens](./simplechat_screens.webp "Chat and Settings screens") + Auto saves the chat session locally as and when the chat is progressing and inturn at a later time when you open SimpleChat, option is provided to restore the old chat session, if a matching one exists. @@ -170,17 +179,23 @@ It is attached to the document object. Some of these can also be updated using t The histogram/freq based trimming logic is currently tuned for english language wrt its is-it-a-alpabetic|numeral-char regex match logic. - chatRequestOptions - maintains the list of options/fields to send along with chat request, + apiRequestOptions - maintains the list of options/fields to send along with api request, irrespective of whether /chat/completions or /completions endpoint. If you want to add additional options/fields to send to the server/ai-model, and or modify the existing options value or remove them, for now you can update this global var using browser's development-tools/console. - For string and numeric fields in chatRequestOptions, including even those added by a user - at runtime by directly modifying gMe.chatRequestOptions, setting ui entries will be auto + For string, numeric and boolean fields in apiRequestOptions, including even those added by a + user at runtime by directly modifying gMe.apiRequestOptions, setting ui entries will be auto created. + cache_prompt option supported by example/server is allowed to be controlled by user, so that + any caching supported wrt system-prompt and chat history, if usable can get used. When chat + history sliding window is enabled, cache_prompt logic may or may not kick in at the backend + wrt same, based on aspects related to model, positional encoding, attention mechanism etal. + However system prompt should ideally get the benefit of caching. + headers - maintains the list of http headers sent when request is made to the server. By default Content-Type is set to application/json. Additionally Authorization entry is provided, which can be set if needed using the settings ui. @@ -197,10 +212,10 @@ It is attached to the document object. Some of these can also be updated using t >0 : Send the latest chat history from the latest system prompt, limited to specified cnt. -By using gMe's iRecentUserMsgCnt and chatRequestOptions.max_tokens one can try to control the -implications of loading of the ai-model's context window by chat history, wrt chat response to -some extent in a simple crude way. You may also want to control the context size enabled when -the server loads ai-model, on the server end. +By using gMe's iRecentUserMsgCnt and apiRequestOptions.max_tokens/n_predict one can try to control +the implications of loading of the ai-model's context window by chat history, wrt chat response to +some extent in a simple crude way. You may also want to control the context size enabled when the +server loads ai-model, on the server end. Sometimes the browser may be stuborn with caching of the file, so your updates to html/css/js @@ -237,12 +252,12 @@ also be started with a model context size of 1k or more, to be on safe side. internal n_predict, for now add the same here on the client side, maybe later add max_tokens to /completions endpoint handling code on server side. -NOTE: One may want to experiment with frequency/presence penalty fields in chatRequestOptions -wrt the set of fields sent to server along with the user query. To check how the model behaves +NOTE: One may want to experiment with frequency/presence penalty fields in apiRequestOptions +wrt the set of fields sent to server along with the user query, to check how the model behaves wrt repeatations in general in the generated text response. A end-user can change these behaviour by editing gMe from browser's devel-tool/console or by -using the providing settings ui. +using the provided settings ui (for settings exposed through the ui). ### OpenAi / Equivalent API WebService @@ -253,7 +268,7 @@ for a minimal chatting experimentation by setting the below. * the baseUrl in settings ui * https://api.openai.com/v1 or similar -* Wrt request body - gMe.chatRequestOptions +* Wrt request body - gMe.apiRequestOptions * model (settings ui) * any additional fields if required in future diff --git a/examples/server/public_simplechat/simplechat.js b/examples/server/public_simplechat/simplechat.js index 25afb25649139..8e0df3b61df2b 100644 --- a/examples/server/public_simplechat/simplechat.js +++ b/examples/server/public_simplechat/simplechat.js @@ -222,8 +222,8 @@ class SimpleChat { * @param {Object} obj */ request_jsonstr_extend(obj) { - for(let k in gMe.chatRequestOptions) { - obj[k] = gMe.chatRequestOptions[k]; + for(let k in gMe.apiRequestOptions) { + obj[k] = gMe.apiRequestOptions[k]; } if (gMe.bStream) { obj["stream"] = true; @@ -740,11 +740,12 @@ class Me { "Authorization": "", // Authorization: Bearer OPENAI_API_KEY } // Add needed fields wrt json object to be sent wrt LLM web services completions endpoint. - this.chatRequestOptions = { + this.apiRequestOptions = { "model": "gpt-3.5-turbo", "temperature": 0.7, "max_tokens": 1024, "n_predict": 1024, + "cache_prompt": false, //"frequency_penalty": 1.2, //"presence_penalty": 1.2, }; @@ -800,51 +801,55 @@ class Me { ui.el_create_append_p(`bStream:${this.bStream}`, elDiv); - ui.el_create_append_p(`bCompletionFreshChatAlways:${this.bCompletionFreshChatAlways}`, elDiv); - - ui.el_create_append_p(`bCompletionInsertStandardRolePrefix:${this.bCompletionInsertStandardRolePrefix}`, elDiv); - ui.el_create_append_p(`bTrimGarbage:${this.bTrimGarbage}`, elDiv); + ui.el_create_append_p(`ApiEndPoint:${this.apiEP}`, elDiv); + ui.el_create_append_p(`iRecentUserMsgCnt:${this.iRecentUserMsgCnt}`, elDiv); - ui.el_create_append_p(`ApiEndPoint:${this.apiEP}`, elDiv); + ui.el_create_append_p(`bCompletionFreshChatAlways:${this.bCompletionFreshChatAlways}`, elDiv); + + ui.el_create_append_p(`bCompletionInsertStandardRolePrefix:${this.bCompletionInsertStandardRolePrefix}`, elDiv); } - ui.el_create_append_p(`chatRequestOptions:${JSON.stringify(this.chatRequestOptions, null, " - ")}`, elDiv); + ui.el_create_append_p(`apiRequestOptions:${JSON.stringify(this.apiRequestOptions, null, " - ")}`, elDiv); ui.el_create_append_p(`headers:${JSON.stringify(this.headers, null, " - ")}`, elDiv); } /** - * Auto create ui input elements for fields in ChatRequestOptions + * Auto create ui input elements for fields in apiRequestOptions * Currently supports text and number field types. * @param {HTMLDivElement} elDiv */ - show_settings_chatrequestoptions(elDiv) { + show_settings_apirequestoptions(elDiv) { let typeDict = { "string": "text", "number": "number", }; let fs = document.createElement("fieldset"); let legend = document.createElement("legend"); - legend.innerText = "ChatRequestOptions"; + legend.innerText = "ApiRequestOptions"; fs.appendChild(legend); elDiv.appendChild(fs); - for(const k in this.chatRequestOptions) { - let val = this.chatRequestOptions[k]; + for(const k in this.apiRequestOptions) { + let val = this.apiRequestOptions[k]; let type = typeof(val); - if (!((type == "string") || (type == "number"))) { - continue; + if (((type == "string") || (type == "number"))) { + let inp = ui.el_creatediv_input(`Set${k}`, k, typeDict[type], this.apiRequestOptions[k], (val)=>{ + if (type == "number") { + val = Number(val); + } + this.apiRequestOptions[k] = val; + }); + fs.appendChild(inp.div); + } else if (type == "boolean") { + let bbtn = ui.el_creatediv_boolbutton(`Set{k}`, k, {true: "true", false: "false"}, val, (userVal)=>{ + this.apiRequestOptions[k] = userVal; + }); + fs.appendChild(bbtn.div); } - let inp = ui.el_creatediv_input(`Set${k}`, k, typeDict[type], this.chatRequestOptions[k], (val)=>{ - if (type == "number") { - val = Number(val); - } - this.chatRequestOptions[k] = val; - }); - fs.appendChild(inp.div); } } @@ -870,32 +875,32 @@ class Me { }); elDiv.appendChild(bb.div); - bb = ui.el_creatediv_boolbutton("SetCompletionFreshChatAlways", "CompletionFreshChatAlways", {true: "[+] yes fresh", false: "[-] no, with history"}, this.bCompletionFreshChatAlways, (val)=>{ - this.bCompletionFreshChatAlways = val; + bb = ui.el_creatediv_boolbutton("SetTrimGarbage", "TrimGarbage", {true: "[+] yes trim", false: "[-] dont trim"}, this.bTrimGarbage, (val)=>{ + this.bTrimGarbage = val; }); elDiv.appendChild(bb.div); - bb = ui.el_creatediv_boolbutton("SetCompletionInsertStandardRolePrefix", "CompletionInsertStandardRolePrefix", {true: "[+] yes insert", false: "[-] dont insert"}, this.bCompletionInsertStandardRolePrefix, (val)=>{ - this.bCompletionInsertStandardRolePrefix = val; - }); - elDiv.appendChild(bb.div); + this.show_settings_apirequestoptions(elDiv); - bb = ui.el_creatediv_boolbutton("SetTrimGarbage", "TrimGarbage", {true: "[+] yes trim", false: "[-] dont trim"}, this.bTrimGarbage, (val)=>{ - this.bTrimGarbage = val; + let sel = ui.el_creatediv_select("SetApiEP", "ApiEndPoint", ApiEP.Type, this.apiEP, (val)=>{ + this.apiEP = ApiEP.Type[val]; }); - elDiv.appendChild(bb.div); + elDiv.appendChild(sel.div); - let sel = ui.el_creatediv_select("SetChatHistoryInCtxt", "ChatHistoryInCtxt", this.sRecentUserMsgCnt, this.iRecentUserMsgCnt, (val)=>{ + sel = ui.el_creatediv_select("SetChatHistoryInCtxt", "ChatHistoryInCtxt", this.sRecentUserMsgCnt, this.iRecentUserMsgCnt, (val)=>{ this.iRecentUserMsgCnt = this.sRecentUserMsgCnt[val]; }); elDiv.appendChild(sel.div); - sel = ui.el_creatediv_select("SetApiEP", "ApiEndPoint", ApiEP.Type, this.apiEP, (val)=>{ - this.apiEP = ApiEP.Type[val]; + bb = ui.el_creatediv_boolbutton("SetCompletionFreshChatAlways", "CompletionFreshChatAlways", {true: "[+] yes fresh", false: "[-] no, with history"}, this.bCompletionFreshChatAlways, (val)=>{ + this.bCompletionFreshChatAlways = val; }); - elDiv.appendChild(sel.div); + elDiv.appendChild(bb.div); - this.show_settings_chatrequestoptions(elDiv); + bb = ui.el_creatediv_boolbutton("SetCompletionInsertStandardRolePrefix", "CompletionInsertStandardRolePrefix", {true: "[+] yes insert", false: "[-] dont insert"}, this.bCompletionInsertStandardRolePrefix, (val)=>{ + this.bCompletionInsertStandardRolePrefix = val; + }); + elDiv.appendChild(bb.div); } diff --git a/examples/server/public_simplechat/simplechat_screens.webp b/examples/server/public_simplechat/simplechat_screens.webp new file mode 100644 index 0000000000000000000000000000000000000000..ccea44396051686b97220b0f5b6b9beb63706114 GIT binary patch literal 21376 zcmd?PW3wZp9Y2a^I$( z`ybnv!*kR3*>B&I-jxrnZ_KaKpWDZuzusBi*59*V?CbL1-(S`~`R#x3-3Rs8&iD6s z7VoWDagpp<-8MBF*nLj*{|y1v<5(wIC}OAKR>4IbvQjH1y}Hh*X9BwRy)()L8~%Dp zF{O>buw4#_CQXDck%u>S&Nft^j(|meh;+5ovb+6kc`qiYDOW`Gt+SQz((DUwLzB`C zV~fG)TXIeQSAJC+@vXB7S!e1wD2;3mw$T&bcKea~v5Lu$D)2RgF=08th|IRLU+0ww zz@VVS?-6aoBBrmb9CQQtZq*MZx0fC`<_=0ns02Ev7s7$LtEZq@_E6O|nWW_8ZJSrs zg3{9$c^z|!VfloLU{edq*-d{eUvYII6fsz0VDVYbm@&`ti5kC4y zR{UNnpk_UTISkI|NGOh%q5?A8s%;6FWQ6=7TDl*5Tz~p>h=rTNE%3l-k<6W*?Tfkzi>mzheD$E8>NExS+Vnk*wn9pxb$Oz zHBi)bO;el*Qd4cVbOkZFZ868j{Y&2}nx1`Ot}qJyH?o%+&7UK=X-}9wcd|~;XMCPc zPb~yTzyL3yvXi2Fb9&PzZ#^7yOW8jcbF<4q(J;Up?ka1yST&WybH)~A7e*0YRD^TY zJ-zZFS0+$p9>>2*CMJe0z$x*Hk>Njeaeb?`1X8081kle}z9EIlrfAdvLoZ|`R_eXfw0jdbD%W7OQCJIQfG9Xg}Auy@8weBDoXp#R`%p?mniFLf$EgXG{bkiG&)Xp;5v_xh zbx0I+M_}H^E?|b1u!D;U7UzwOK0O6Gm`R`6T6hv6cTdPu$sh)m;OiJgEw<5#jq>N!?uL;e?(lY2xoP zsVNcfeDwkiE7dYuW{(D+1v@uQ53@yeKFW{lU~qGMM***sAdLf!sj0X1v2dY<2-9eF zxZ-Y>=x>X!VayQEV7U+)gxp&`maiBw$&?ufOS=I;(#zDT0?o0ywe+o65>aiN08-xx zd3NSF_{W*DYcdL*gJ^5oJI@SKdH`U9JTmOq!QIi>ztsO3pESo)R+`azkAh;))tdGlu(q znD()4IS%y77Y5~V)+e@x8l!iCCVs|&@rD!8qCauC+PY*TL0GC!ue;V(uneAa2V%MM zKrB*s{ITvtR#sAX7YRsn5e~s~o%#L9PNO-%tOdjZ$utIWq1907GHhucfg^smXWvjovbh#=&j0&YJ<_k079oD$ZL@LOR|W2o*HXbEGVs* z4J+2jDI{iJIn4>r1ScW0%$*1xv#W1r=N|s0DUX`5E#gQ%WFo-;{42Nv%P#K-D@j+| zsK;&N>#Ne<@CLuJ$Cudl|p!9j9Me{H{bfG zCNEPdsg<$0Nl)mvf*&UCunN2{0!=%<%eBLxv6Ygx^&T%XL_O?}=Lb_xNzz<*aQ3*? zp`=UgUku1VV1ma=BsW_;`P62*viYS52S=BWU%zymKFl1}E;;9i)(V|y1&wB%;y<%v zd`2~7d_8HHpRohBOa<~%b27V6@1&H#en!RWM2mAPD(0Wug*m47SQJ_~czXXQ+SHxh zyaQUs4FamebLeJ>)~wa18L2ozbEtHu7vv_D^piND-I2$+x&7ShF>0UM|MgiJH6d`| zAU_be9erob?%2rA6vAI^ISH&r$_Q9R$pwnQ>7xZ9%ZkG=pVmMKY`^StQ0+fRtF($$ zQ8e-G>H5T>wvm;yYgyw?d%aGX(_+D^i;HE9jT?3Dby-!f!|!xKFIEZ1!nuY0(a|K|E%O7zfl(fB)DM@!TK9p;VL*}2DmIQyx;$APAc6a{<$y>jFJ8WCRUFrFVfZhd|9iyqCPT(O z-4tsOpbB*D1PrV`5px^o_x;qPe1&W!7~SM2cgQQu!n6N>j#3DRL3&mB{%4zgLrXs= zm$3Rwk+rco6>QwVk_(!eeS)VHc3Uaa0wi2`KKwiTKBAV>nYL9PK)7OZ2{MSd_$<5u z`e%Aittwc^NUN-ssL(8eJ*^{1-KQyj7cP9gT^&zJ;0Zdcb*ulX^yTGC`wVODaf!!j zp*6$3Ip|=v64w<_X@1VfZDq{1`O#Y7k8Zv(I&`fs%8-G`vGGUqZ5X&Y$P+$eQK0*T z6n=~RR<{je$vz-8S6~nXo&nT~Chs*}fO!X9?@jP#NXcwW@0A93^%Tq zfZP2`2 z7o}X)+AXJau~qV=HvKWs7Se#NHEk=V+@TuqU2k zfh-0HM6tT_&2gdUBx?%E$D0K9=@36)#TiL({CBc>vGTrHee`DYA@YSidpIX^#ekKz zZxYYHmKtzP>f{NG6xvpHb@R}~$~hE5N@&S*q0td5_PVrX1{JxA#)L1Ch!%Ou;hc(LUFh+F7Fs%Y<;ME z{h4{a*o9RN4+Zpp>QMWR#}@O}T_4FycGJVT9_L(9xIm48%=k$dT+uPEbnV-F6*v43 zC+&bvB8p0)?ErddAS*?9&pmZoSo+zDK1P>{JywuOG$HNVA>$2C%XX|hn!3Eq&HSk^uf^&%NtdC(;AnI}9Lft`NPtnQlA-W)f0kl9%DUH5 z+Kri13S2p{;{tvsr21`sa#9LBBbayiilp#9T9id86!!9ZLZq*0L=qF69O?<1orB6j zw;e9R{rL4WIZ>e(P*|vf3fXQ^A4M6w+gakH(NEoYey{MIIW-9n@vt+)1X*h&%^2;m z+yn9ft#y?1+X;{r!w13Q8z_D(*^m_lUT*Y>!8};7r$x4Oif7QI zn*ck!KZjzHsgTwqXFbon7KPHo0c;noSkK=0YCo2C9|!O$^+mW((fQsmA>-wXMFjOA3ua`MNisL@EMo3Rw6wr3Fm%Yu53H`YS>sPSIo;4!M_?=cFPc@{Jxg?dwGZKV~&DfHO{Pb`-$xZkX|}O&(NAhytPFijjQv5p8ObQ5nkxObIGkMy z<>lLdjVx+kf=RY*og&?@MIAPMac?`5)v6uLwhL{0nm(^2Ln&t^?*W66or0NX2*m2a zj}aX47Hgw|PP26WjBQVLRpbrH%&|B(yg_ev=W@cwLh~U^B;wDi*mFVD4_3zJch3nx zg(#}mhby8D1`>K>H}1cq_XB!yZPp6%Z6o^Mv31y%l$6N`i->7w%wV%7_q8RK-^i=J z!RTDC*uN7=gwflEmXqUj$91-i_V>r){zdHwZ&+-(8h_RZ*RZ9ZGE51Bit%2ZQPNRg zmxYb%+o92A@(HkUZQ!DJw&%OO_gR7Oj9WsfzY5w=2q)C^&dW)>!j_y?EvZ-PHS&Ftn175 z_aW@>hn1mLy)^>!IaA!4>NU;`_4QK}3?z1jHS0=rTr)@JaG=(1oz9FR5t7YCorI>! znpN7}uw)y<&H_peF}sn z&s(EH$Ie@VZ#s|igWWmsHJuqU9FzlLDjIWK&XNb9vAZ}tvvooM%1>IcGEf>}*c;mw zwR-arvlG~bX}P>|z3_w5WtUB){U+_aM-9<>-e+JUIWCZl*D)P2&q?vHtlIY#p4zL_ zLaaEXD1tEVbGr91sdo{VplvnDu z{b{`6{aq#pTT;gmtL}|S6saQFg1S4e^pM0 z=b`k<&Yu7=bA&o>7h+Dex4D_;`L6ogQ~lJ=<<~%^_JNBXCX zr3r>ifa6f^8(r9Y@H!aD5x3U|dMEX{A?bNAt!ZTZj5Rw+vy+m2XhmsqjW&5F3yhyg z1OI$yqFvl6j?(P$jrFeT#}_G@+JJS%!vepW4|OO&+4yR}bvd}pdJ>{HyDK0J$OVqj zw8Y9L1d!I5W{|J>L85<8wux0GUJVUNzjLJ!oSvc+flAyJMO9pu>Dt@TuB|)10o8S4 zuN6s8gQT`_!l6ZY%A{O!^7z42`zrlbIv$*bFeo{F02p-|NO%hVRF4JA(q=+4xyZs+ zkI>vc7u^Y0H(`j}GJ#wsh{yYAvYF5f9|{Q8hy1a$RRa=R#i6ru1;H{7r~ zzC*>TuK65r`i37FV32Lb)4M@ik+N%HEEl3LIS>9Gq}BsLZ`v@CS0G!O?_LqM zcGnlF5%`|yS0^95z)dgCNMs0-TtL7wP=~bH=zB>luHxroq*yj5be`$R1;Y?Bg^KIz zicRkdq4+Gu-LJ-)Z#ro$8oVZ}1@sX)-Zl^uU%w1DZ^L;yX`1jtS^v10o0g!tShO!@ zX88BSi&KP!vf5Zz-_r*NRp@!x+l^Swc8G2u%75W`zv*ZJu^OLrFeF3MFVX&93>ESF zuR(_{V8Bo2uh+p)mHH{i|>yJ9*Tt5wz4j zU9rfa#@8C6m6cE^-Z8A>JSdcy3z=jQiht$`f0{Y6crYc=o~?g3nqH z)jSuFWJWEDP`O&jIQW4G4txzJ7q+du^*R!y^g))Y6wJEo!9;wGG|t2`MJVah{duz-Fs)U3b@lQ8j!7pNZU z^RX4Tx^zRplx#l5Qyu#Sx77_{@{bi~H(pyA6A@dtNiPT+YTyqpDFrd4)TT>@pPJNj z?p9;kW>Jyn!_$%ep}uGKrbP__I`+PUQe}%{X4W#6rmX>e<7_lv?rl zG6bwr(3wunsBjyOTE2!Yv~apWE4@x}-C?64yc-iiq*D@yX`@+2Tr@uFd|g_4k=pEj zUfsPbmHc)ztdNf)BNYd4kJ1hvix)b_*%3AOXBBq; zNyYO9VRv?hOl3ImoDit3T~^7dm}6bddTNP)=k5=uFZujacOj-Pf<;5^8}(>23SURH z?6b-$JkA)w;c9EV_{lN0)ez&qn}&b3q_S7aTQC(Ldvz9UKS}#lZSqLKQ9*r{c_!*F z3nUz(Eyx8vXPA->>^%mM2I)5;XFuW|$^h0~Um#Tc%>ziMdx@2CYD%xqWaMytzVea1 zuH->Ksfb?<(~SnaCDAE|TvpkSHK(i~cfkpPyWr&1G*xMzge&#%n{I>k`Zz}Mh4z+X z*&8Hlf0wSeVx2H2o^=`C%BFj%lNyz2gs_R2Fd1VGXm`vJuqhL_v%PxqPdkzFp1{wg zBw8-*BB?nE0xUQHtu2)c77DFVV-a)M7Phz$n;Xf5DRn{m+f zneCu2=t&MCxWq=4goWEqbE$Kr*GfibWqb@IZDkUJ@$NHOYh4cMaVR^G*azM1nHI}}c(WEbM@pS_3qao$xE4u{5D_!D4&Xapi#@>gBo&zl!ki)Q+Wx*M3jLo8 zNe!(hU^4*($+MU(#`QB{wm2D3%l$Ol=DYmAkow2=I%Wt4Y9Di1K1Vn&PF(_uFN%fT zG_Y|v^&#JhP9{JGM3{P*|Af(?_^Xfcm6!#&)#_Cq^khYwMuMj>aGEcH@;IMjlSX?N zG38d9ss^q+ofwI|Mz!{Z9MtI+78MielY7cE(q5eVByPGWd@4EF>Tq2mcg>sAiOdrs zx%h-pBQ&|yBkIvS%x)WV(pbS_A`H&lCs6ltbxp^3=s@j)4K^r8$yT=d`Cr&jQm}D6 z>2yM|9T-L3aN>kJTsEr_I=2I^f^#tRVr>0zvs*l+WBC17*m+$xw~eJ7Mm#}N_nRex zv5i1CC4Z^N2;Q6+A<8(IQmx25(%%Ojbf7*wB3Zm|AwU1X4O?Kx`B(Ko{>ABi6%2E_K2F2u2y&!j?@~Qa(x+x<|-Sy;WYDP60s*Hk~ho!xu5A(Pd_?! zjom0ecOop6(QEFO$_{%o?XwM4 zGiU&!kV-RTl2zMRi*d7E%2gkd9#pZ>Ogx%Ba>+`7u~j*`NBu$VHhrWTIF(4G{5u0@*FEHqg`Fw55LUUMJx%^yOhOc~V}F?>>hYXD6)OhtIK1Xvl| zA|nu=nn-qh^T$4fOR;+&xDC0huOinfDhp10fsk1P$P2!*fBzHh$#h*h@at5$25sd# zI4Q29r#`wdzaqepXfOKlJ*NZ*UBw zpp0BrjaWhH`YVoV`kv9%2ba+4tk@TghtKAL z`3SQhk2~d^wQi95EyGoi@2JwuBt@wn7w+Tiji!eq?cA;6y!x>wiX zMAjp?%^}{qJllEVs7|&>00xSY7)tD$NW%6 zQ%r?NL>zDj(;K0wtJE3kqA0E|4+rv~=H|^DR1)M!CrsIwC$0OhC+_0W2$F8I>;+~> zE@!psQTiFhyrDIxrlh0N;|sw2X*yUeut-KPWjnqp;5uMmtCD=9>;P(EF_TbV{5qfYp=5=B zRLPGshU$^NY3&G|XX{dKZzG1}@E9mXK#%0_3QK^WvBJ7=vyhR<_Gds5tg>0toi0#U z-$@v?^UZ|a11ZudC>#P0V-AoZKhKb{toc%hQvSmalX(YTX#@g~GQ0)vmr(T| zPsTW$gba~#J}2PuZ9_wo*b=v2Vu5ir;w%zwkRA;VyUALlgN{Fr)cP{LXJMoshWp}DSR-@Berzz@>gs@ zkN-%+W-mV1vZtoK;V-k_pR)Q@-ETi_T@CAHM>b0No>gYs>$Q+2kc5LKf;*&OZL(mJ zvC|jn`5!3u_21jyF+BpF!lK#Z_1Yz5088?3@MHhc&^ZNd!Rk0BA22 z?%87$X57;r3J$N=b3TcHD%rs8^U%G{r;k?dBw&>;;=@1Q$+1|9UjZ`woyTy^I$=x6 zj5^_AlWrPI%voa5z!(*7Rr` z{!8+8T_Bqjxmzz*2;HWT88+gf(l0==L)XR~AHZ~n6x!d@C};5RSp^$4c#zmO4}nSH zOmQssKZ0n1EZ6GXig2`n_2<*x$Z&3Cp^GrxM*XCTDsN;a3S^k?pfioggfK2uqCJPM z(;NN5_L}BwG-CNK{Oh{y8V7mb{EDWtT)f|rI$V@LS6BnfbD#-_&&t0@V5&J+u zXx#$No?TSgZQvj{6U^u7LFFvvj7+ALPZ1IBHt45c$6kR?nBBlFw^x7(ti=>2NTBc^ z54n=Wn4oxm0Fr`9o{D{rD2KYMoskS<=IEe6RJFuSMbBN!6Y_y~q>``9yFeQvk2+Zcsry7S^%4jLz?*Cm;lqP z|1javJY?6g;a@IakE{XY)c9PZV`*^U;I(x+;Ha&nv2a_oJhZN z@6$Gb&hlB%1t;e14oN|lODxI$NH92DKt8Z4xDsg#{POTqDHGgPeRD*lD~9k?4qohN zF0o;CzHX7i#4~j&UW~})a8U4D>u<12Ku3b+SPwpOYixAtzp(j(bX129e)uVoH^?2P z?o;}L952bqQ85vGT1x98h3NcoLZ6K~9L26Qq&@Xu%A7Z*?j83(nEF6$k!hM$TU}G5 zbbB=BvvCaIiX(V+w`0-Rt&Xvu7lWh`O!^9ZCHDzdCfAjip(> zg%JNeTpXBoi~I|u3rbMsp-cnJt>-J4o4cvrS)W`t1DR;?@JLgUMoRUB`1?Vg5pYHI z@8t-zJ6?C1Vf+~O?&+oM&w1X5_&kpEfo7o6W!U_wxq=D@HlBGeYtPAPJTp4)5d8mybL6OYG#c0J&G7L&Q)KtF$N0wg|N4nOh?)Q)KpduBLYw3eg8ON zNa+)an^fYDZWDzUOXEprGiHKiOzpA#V9FULsyuU57q&(1YTTgk6Pj@hO5bvBhI+4{E_Q8EPkdlp4Z9+32C@Z^e&&_)O=`N*Vs5(=F zsMmKUrOR{J@@+^jFdJp(xUAR=okW|R9Rtx=U4;W99u*b4r_3gszkr|i4_!*e837}c zLr&T7Q4iZGfFG5tu)9JScuGPq^^FNXeE|;Q+~S^rZNzf;3?&(o6|JMo*_cpNzdVr19m-Y0sfFa z7NwE-RUYPL0PbXUS!ll-2>t8=O{PkKH$Pqchr)%PeaPXt<_gAE{t#)&yaAF&pz!xC&36Q^;CiER*T_#n7dB_hZx z`!G>+ud7lrgMpj%m&%3LwTa7mMam4*eYr%u5kpxFWh(*>?*~7#$vkZ#N-&}ee(0{b zy8{((=#iaQUGsJ+3v7ZOqwW1gS}+aqYxCv)Q5@VfCXcgB2V^dD$7GyFaZ;_DSXN^v z@ZjghK{_lEK`d7@w^m{gR?VG^Bf-oLVm$=bqLY!6!)wwk$pRW^4_IiY+VV9Dip&fO zr1~8(Wlbwn65g~PCyTaovc~anHmx_U>>oL$TOHsJtd_^EmbnCj{MCDbq7>0Wg?R=s6nDl$2Ls6m`~ zMEU#=rYr!+?bM6QC-ShM;K!KEe8o@7-KnWTch6041ug`0aqAQ}q^x*9w+oTGD1fJw zteo}x261{>kxo)rSdx)eNH*9#0N9i9?GOCE!m930KPTk0r3rN>OmO|~Zb{AFtByMz z4b%`HQ;6}(H`UP{BzQA((3N_Kzj?Rx<1X(!G34-GAj6uYw)=~}oLcv{j{C~$x314Y z?w*)l=w3uPx5oV(a{U|G4y4Fg*p9gTs+%;%yn9iv0==l=B`s~fc$g#%-FDDGAeLBW z*m$WCi=6pGCK@xuN5A5iI(w}cIM?mbAzS2BNwm-h=^CB6nMt4G(qUTNY);t62q zl@yTwsTJ6vBzE~emQSNA41IV&sK1)kMU>+GAl~@Jo^6H=?|4|C6r2!$-c4^*Ozm)m z?k{z|C{qvgE5Mg@{a6?TaODIhK~) zx#oF@2)H}{kyB8Wdz{|k+jUTno_#0fprkmUhwOXiT-sc*4UM<(X`v5y1>aV2M*@S0js^VNdz+goevmFz{SGc9OLkfCD?B;{U z_Ts>az{v{L(*scoyCnIoZjfZMO%igc_; zHR-P(`u97wO)|RKlkc07up@mI34|2+c$r@KZAnBF)SCSx?4LQ$tw2cTltB#&84A5lH=3p&y$dRFs7U{(jlFoZ>CwGLm`umLbkfN7NS} zvc)_}`5RC*dLxf_MwBmpEK28EQR}XLDn(h%{Ct=zD5Kh?TVU5G-YE59SOtXqtkX72 z=-(c|3TfEUoD-;`CE=Y{|oWA5>1#qdel!PyFiqY88*<^ix5tI&(?(~x=|<6XYL@Ap>f6Tr{gk7 z6Jf$oFaqrN91yuoiaaqt6}t;<7NmG$z*zL{5|+OBKNBzVWAkfO zzUi%xAbc(yt=pOzTD*PkMk|;lqdistub4W^Jya6NEpy;NT-f#fcU;ylw;1``R#Cbf zJ6<>Z+2m69kfGT}qQ~F#!EAH)V!o3rab^P-C4gc4cK zI)cW@F2@AkDv68Lgra+DD?Jn`AuWHlKS+d6&BPHWwQQ>p5eXh z&kqF_TRSmUK)g8d$cn~p$<4(?v=~v}2>79A(mIvmwC&?hZa(#eC*F#o+)B5H{P=u` z6ceXsL@Scae~>7kxae0f{iBsJmb?^Alui)3WJXAD45I1@gw+~ap12+!qr@}RHsR?D zvbez|OI|)?paP2c=DYByZb^%bZbk*E>Lx%l=UOHbL0SK(7H&nwZv8p2hOuH!megq*6j@fyHP zm$7Jc$k1;R5x*XpRn~$rLd>w*P?-Rwq1Gtbi;kFon88W-8buQmGnuK%vpL91$5N?#^G!wfn+;PG;{9m@~m|Sntg~mt|ah2ti`0v5=nHv zc$rw?14zW_R1F!m^{NAlWG#kJGE{(D(+~p<2s`=f2etfH$jOIetqWs}l-a6tTH~-u zo+Wqh*+2_C_}M~HoBUcjn5&%=2&~+Dm8P-(5D< z#0Z{bNl>Ced>WgJ@e_zI=bAc4Y;4l*pH*$>V_|FuVl2L*|p*m&q0=6lz2=eCV! zKzt26jEaLX;(mWolB_hksvpLp9s;Znv&;bJf;EeO}*0-XoU9uRQQPG68eUs#Gp19Dyr`5yk1Fw-ocVxC^UMZC*6A??6KV_XGc-QLDTK{T6nt}rx?lpK1@HezmM|LD^} zlWNN)fF4}h8_Ogu1h8(g5(9E60oO&sh&1=Q_FRm==yLpaPOT*;9u89*2Ym?u)>!?q zfn4o~W9uK(HexQm*_#XWYSJt^31D;$(C2)az!-?OxwVH{w03jfgy|`HBg<-oZ`U|g z`DUfG^wB`kU1+`G&2Y5}0+1aA-h<{mG9AFKT~2?@elK>pC@{r8Tb{{2jZHQhmx?5{BfBv#;~+zX2tdVyjgA zPc&DDP7pF1>_1u$SBw7Hf9yMD&jFq9qwi!W&|Nfe?_5ZUY4N2V4B2Yhu(NrMkq_5; zhgnfBlL&~9ixHnj59UCK2(i8+M&J8vG&WGv06^FXZx7PRLGd0^6+eUO#){({p^TDE z*S=DGRp7jX@W@O}1d?}pQk6m})thLrC|Ha&rZYQssivyQ?deK6Gji$WK%(p|#ITrxi?KgMO~ane_%Ehf=MW%9T)uz|*@;sEG$ zlpj&vyZR#Md_2{aK`cDH?L9Xo7YSRfm@yd>fFi$PQ%rW@MEth5Yx%T>ps`BG;B(4r zC$KSyZT4MVB-|q_-$%jBirHD(`L?wKUq!-~CIee~nmJ3I3~C~)H(Tgqq+5$Q8(pz* zBkY4GTZ`NoC^Z%XzY;0bo`UM%jpF?gR>z@Kns-qbfJ2TpJ~ z^H2vuO*23>IJjgFmK?%JV3(UDq-zboo=NYj%y$q`?bthSPaaGblb*I$E2j@Y+M>98 zJ72Mjl5uPXH?{4ovKYy$xB#?zN%|UE_m4`C&MFe5Lo|}wl2gm-NJb8M7jc@%b2WZq zenLZyIR`cdXv;R->9#8c9A!6B{3+t7FNtRyNrWeF+h1lxCd-NKrAn?Z%Ah&#@J8shryg;mggGNi}!{8Wwa z7FRv?g*$&I<8R$&P?{?gdif@F{E>yf{G?MBRWY$=)A#eq-$K#CJgJf|=qdUf9&1;ey{OrY}CVvP1J8}8%Qd{X5x^+6HO?E&#w%c9!qA+swAV*UN4 zU}r-2YZ}wok2J9a=$8 zkmVB;Rz-mAM+3-+-5 z+yRM=B-_bab5l6b+{JYH!RfKWIjjN{mihB?#zklcZu^~NTl z5>(4It?sq0?@$`BU3x<{JolhOT*qah6CbZkAlU$E=d{& zDlopv>2@`au(`1AumYEIEo?S=8)8n;2SPZBC{RdRV$h0NH7GNiyM5UqwEfjtBJfvXC-?9q{>Z103;&o zL+6cxqB$}3mW^%l_QJb_^{|$jg>=^y@WIz(mzV=c93K-^DrIXc;KWgGRJ#v~LFDRx{&wRvNo(iFMW$tnN_7>~ zn*U1Q+U5LVtP14;;q^}RhFYc79=(l{9)e!}UxQtOd6d6xpnRtHh!yKcCi z8w^mOhmO(V_Y>93{J7wI@;qG}xY4(G=BoW+BUT#WQs7XWDtf#RK?X&F+Gh!S$78|` z%Q^Vy)SGWCn)WO{yU`tjPWQygmF@Cr#4$hF$&S|k$P3Mx2r=l`1;HTZT<@%rbw_(Y z|5a3mQCsLuA0$>v`TfYm9g z#B@@4qdgH?OP*l>BQt-H+YAMdR?cyQXMKp652kQi5nIcW#>s{9YN+lScx zFlQGe@T^rpoeKR6F2@w~I+9fyJ|ksQEJL|51Uzek;70z1uMv$$9n6=&?e0H|Cvf>sJ;bMV|nxp#!0 z@MQSBkpZB>r^l9XTGq^p{MtH{V%t!BWhS&-{kVH`DWSkil})$d3qMKkBJj~J%E56k z9=Xy|!ZS~$herwu z)kWoXx0X~}=t*PD60p2iTs$a}2+GZHH;7wNj87D4Y!U^9ZH36QL^=%eS-|dc?{PSS zn`C5M)USKM3<+9reW$6HASSNIP>}#&KZ;t?DT@5?5HT^tk6I@e;}PANKQIF){yaTi zPe>T#Y@Q34N?Gcy6K!9Rj{uU1h6WbpsBJ~9MJNCl8xC5_Xa7yf7Q$i`paZMfSkiLJ zqJk%?&_YcJo{t{?xzH8r$q$ulcF~-$SwK%g4e;u-g^EU*corQ-+~Gy9mD~~C5#Kwp?tmB(b?aK`vqDI<2+7%Dz7DGpNIRX)}c*Ad}d{F66;1B#( z2^ya$%^6<2GC{RE9lyyGraRWXJvZ<&mz}ew&@&niu;rrfs%U_Uz;M+p1S&2Q(ve)C z;JtJ{=BQcG_WYkFCSDaaZ*!2ehw|*|GMV81&sb5o8JWCwqwFXwUay*IPXRcRr)))Q z%Q`BnzH<6aiG@S|Rf=*uW6w5D^!gSCBG|GWWQIvzrsmOaZlvpk_!);O)XQ=n`;m4S4~e|K_}e6be!`MPm}Nfwbu68GR6*h=fx z-{0F(cD8^`kk)QOV+RcJFGQ4H0+zCWcrA-=4el+XP<zJEd7=TmW1MVLS!CDnUQ zRHce4t(B-hZvO-TMiHNtWl1+TVt&7C_Lf%L6#9vIJ(C+3S#-2<=Ru~e!RK#ow6I{W zh{Q6e`?r{R4B)ds+e)YmHMMwE>`lNQ$ zqouowX!Y7J`-)3;sGPRg%9p_Ie0)c=+ro?3PlG7$A9+ShV-?;NwmaOBaW@rcQ){Y3 z#gbNI{$>MTU%Xhz@Aj1UOv6AwK2XpZ8(TI5$3&^}J1~zx)A=KA|x=RZlif}O~gFBzrL4X=uIwxks z*!{gRmfoMP5$@axZCe9n#7ch1=(uU*w}bNKSi?_V-=nx1kW5}>k1^H9?mRU%UdAZ( z2%$4@q*F~T?(V^3Z%R&!&4&Tl@-e|&5kp5g;Ks2}m8=Yi<8Prj35ZadDA-6$OJ6&_ z%vU*})Th4RPYi{-Hnrz5yhXRJ-w${mHoznS&}W3liAXOg2moYP5YpWlFW6Z^6y!~F zCK_^Ps|^FFb)cvHz($rU61O`hP1Qm1O*6fL#X3>UBtZrne%m@5$mVkT3G9S2GSLZA z8_=ya1nVz0uLvzE@W=EsWI?wi!+p$>LpTJzbx-0F>aeo%O*G^XOQ85#DP z6=1|FIN7LEAplwo*K0sSnuo7v@%3rA>R>X9QygnRYg*h&vI`g(Vkihc!xt|IfIa_f zAKO}I#RHYdj_{`gy@}^#csA9WrzglUx0GU@-4}Fjg6R+yODJ~7z8459Q#o*b*-tGW zTPvVmmyOuS1Q;hlQc&kudmHgtLh;=fFfk#3w|LR;(e)#FuT8EGtWjk8 zivIYUTSdb*3WMo4*n!GQo~uXFdw>#2xg_vY$OXgKS5Y0_&uYN7(5ng3w(tMBBOho1 z&YBXX{K6Z10cFJ^?LJHOHpox6Ir+=+)MApE7cgtidF{OBa^fTK?f_iqMNeQC@q!xw zRY3sHNb`N!JwZH3w~(C^AYpSGGtIsxN^BWV0H7undGL)uIaD|2YrDzVsN=daCpXJlwTZTX zNWZz|{eiU3z0KAV)q3E8U}HvwD^LDW90#DccJ$Wo zn!c)k%vR7G;NdTjUI+Eq8WXKj>I|eo@qXZECoI$xa*qp&-#}p9lI-k!fa+?(%>`8!E!jhBrJS*-xGdyWF#Z2i@;HRzV~1D)ajmN zfnYXDD-uW2Y}ox^bkl5mir!2o{QK-e9}#H~>~gEkw1-q=c{qdvgfWj|_y@WEFOOIH&+oVYt(yzej?bjMoC+ec!5HHrw9RFljd!ck zGoD`s&vi7I1d$zT7j2=g(Df~D+||A)Bprqhsk{CySsNCfnjFaUJ??k?4s$f*6n0}K z@~0mRZ*6fULGnuo@qv# zmE_i~5mG(udTF{0u>8fYDeb%Iy zUS0}EfsSXrm=F)0MdAn3Ki^s4bRMhBUk);i5zt^rF{g61+E^xz^1|*)`v#zU^G&Q& zd$<*4AZ8dE7^9?V5fVetwVe(Q=D7KlamB8Z3P%AS%#+@Br;u|QXAubslFW`RgyBlV zv~=_A?slkg1c&0jO;jeH%}fXkKWou0fRQy+402t40+Xgvmj0boA99DTnhlqf%i<5k z+LWLYq^VO3o}~<bT1wWU&y$=cCGe%hc5upW!`uRoplZ0C zC?+G-e#NBUJ;y&Az3PZ_tE_@CCrnvayo>Cl`Zl#qVa2Uru9;DWao9i9=(LoBg$(xQ zso&_>i&gFC)G2lkf=r)2yk-@nf?moW30ut5Pw|mSIQZ_~m13T8F_>~VR!RbTDgJ*tp$P9gS9?)0Y<83P$*ziXTZqnR*I!L&-UHeW~#WV)I z7y+GyTF;78X^zEDEjwBK6}0q!e*R_A74^tgOE*9&xRGtBbWoFSM;8~$+s7v=?}ePHE48=Jct5Bm(& ze~s6v@;8G1bdQ~ASQ2s2(P;PP3{V9|!o$ z2zPU^>0LNoO=TCBe<{`HMMh)37Pwa~Y-I|l0Pz8|OzO6QdktRWY{I!kr0}*7T_NU} znZQOVKd>V_zyUd2`OQL+xRb&%Js{NbX<`ihHtma7M-OFsCtdXe*OQfsW-LLPIB_0v zhv@;9Or0jGK|pl*4YFfSQSe9f`EG5!b`@i&S8rzQd5nT&CV%+K^6xi>lDOjS8Dl^R zQwD=S?0jl#s!P44QSe9f`EF^Fwx&rcrk8;@q@K>OQBr3(Jz?uM<@UQX)SZvv^Gjr+ z!pm;Q2_|}uR@F`{TiWM_wQnGiQ)+hxr0plMv-zarS`!%0s3AlfTo|(5MZ~U2oAU|m z7yN+aFX4XrKff}t5w7$R_5ms$WU>~d9%S9%A0Qoe3bL)NA1RX^TU2m$jVJS96JGtZ z9|9SR4!2bnv%~^mSaAMEIi{S4h9nTY3!7ZmqL{66cpOLLml6E!wL%jZO5_E)?~?#-F`jAyfRSE4+|{`{I-xS4)m!%{g(d3- zk9?YQz()YHedI8vOQ`4dNdNYj5ZFf(@PbmWY69$`e^}er8#%*6_k8RzG}*i`S+?3I znW9l|EG7+9f0Um_vQm3D=Tq0 z;yz$;K3v+7g(IKnwevSE4w&_sVXAm2yTRaJUh&w{r=cGz|CampKAKgAOi$C#32J(uqBXtXmUlNU@>6;y;IcE@57@ zN6-9=TS{v_;bt_QUD?Ca zRkJ|SeX`#=D4LpbdsWC=A!l4n+K4(FGDI2Wtm!l&XhUEvOc07ouj`}qhuk<1Vbnja zcAv+-LoY5j;ksY^OFDIy6oT2;l*cr$dr6*yW^5!hkO}-2U{UGYH;tRa0ks%l3oSni zgj8key5+t* zzQ_JZyNMQFpMD&hjF8YadFCv0dP;KJZ%z*|l`GEYgWZlmxRVQ7A%k^!vadm1(dr>V zVHoLEiFjZThHKws`_qmpF@R+DQAwt;q*S_JR9bJvkMCW3*wS~#i3P;~CiMADayMSC z`^U4LVhXH6?2P`uARg8Ao?Z?k%r&(MAU(82+ZPylJZ~8`xvu^!&icB>hvYt#vxBKb zov!A}BGso_fCuUwLt>e|b4oIim>E;*aLh__^bc1e4y$b`Ate^p*CiUsMl`N98}1+N z%8A)NHn-dZR-?ccL4aB0=|qdRh9YpLlhYf>)F?Yb&kQ=wjgisb-7S4bNr2EIUyLWp zC0-f40OxVz$K=3lzCR8rIShGK9pq~t#77Y~3wkwYrVBPuFvMYOzLR<{9=~kz=_-*2 zte%C)iAF~3_rMDokFk<(B7o7>335RWKqN0$2F<6Cj_czvZI5!sto?TBLsJWZ_JYOb zzw)qd4T7MVLUi+N0lgobrfD4|`~OU<{So;TG-?owke<;2QYh7(buiSZY>%nJUeGiS z9sgN4r$_TJvXHvkv6Z~q;yOm#untqMOqn7pZ{Hl;6#Du6f6VfakCQP?je-C86}T%b zD2pXB2eC=0ICU=FP&w#(r~~WQ4RCxaI|7b2ncGr(8Lshe@HV9&NQmIu=0miGDfEw) z!{UyAJZ?JwHvy>B_Cx}xN(U8#+)of`nkG*g=0;Dkb0ZPFjz}z@39qN&HnYoZEGerV z*8PX2AW9Fw?|Hd@2~GFW#~n0GmliggPU}YNdz8QQkiwao)1uW9p`Vqe{{8f}tc=cX z)PaF``_xs!2st6%_o4gjl=wL}mRuniREBsZ%(yO-)%k)3Tym!z6juc`6` zfv+O}C^-;-b6=ih6{-SkljO%c9+Y?n`6z~VI%gP}mS?aX|E5YX9gA(TaLGvIpUF%3 F008Q#p-=z- literal 0 HcmV?d00001 From 48e6b92cc378c937e59719f2c0f482bf76c9ca81 Mon Sep 17 00:00:00 2001 From: Xuan Son Nguyen Date: Tue, 25 Jun 2024 13:56:49 +0200 Subject: [PATCH 05/15] Add chat template support for llama-cli (#8068) * add chat template support for llama-cli * add help message * server: simplify format_chat * more consistent naming * improve * add llama_chat_format_example * fix server * code style * code style * Update examples/main/main.cpp Co-authored-by: Georgi Gerganov --------- Co-authored-by: Georgi Gerganov --- common/common.cpp | 60 +++++++++++++++++++++++++++++++++++- common/common.h | 23 ++++++++++++++ examples/main/main.cpp | 55 +++++++++++++++++++++++++-------- examples/server/server.cpp | 12 ++------ examples/server/utils.hpp | 29 +++-------------- llama.cpp | 4 +-- tests/test-chat-template.cpp | 20 ++++++++++++ 7 files changed, 154 insertions(+), 49 deletions(-) diff --git a/common/common.cpp b/common/common.cpp index 0ca7b4430f765..da6db4dc6a09c 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -1444,7 +1444,10 @@ void gpt_params_print_usage(int /*argc*/, char ** argv, const gpt_params & param options.push_back({ "main", " --cfg-negative-prompt-file FNAME", "negative prompt file to use for guidance" }); options.push_back({ "main", " --cfg-scale N", "strength of guidance (default: %.1f, 1.0 = disable)", (double)sparams.cfg_scale }); - + options.push_back({ "main", " --chat-template JINJA_TEMPLATE", + "set custom jinja chat template (default: template taken from model's metadata)\n" + "only commonly used templates are accepted:\n" + "https://github.com/ggerganov/llama.cpp/wiki/Templates-supported-by-llama_chat_apply_template" }); options.push_back({ "grammar" }); options.push_back({ "*", " --grammar GRAMMAR", "BNF-like grammar to constrain generations (see samples in grammars/ dir) (default: '%s')", sparams.grammar.c_str() }); options.push_back({ "*", " --grammar-file FNAME", "file to read grammar from" }); @@ -2604,12 +2607,67 @@ bool llama_should_add_bos_token(const llama_model * model) { return add_bos != -1 ? bool(add_bos) : (llama_vocab_type(model) == LLAMA_VOCAB_TYPE_SPM); } +// +// Chat template utils +// + bool llama_chat_verify_template(const std::string & tmpl) { llama_chat_message chat[] = {{"user", "test"}}; int res = llama_chat_apply_template(nullptr, tmpl.c_str(), chat, 1, true, nullptr, 0); return res >= 0; } +std::string llama_chat_apply_template(const struct llama_model * model, + const std::string & tmpl, + const std::vector & msgs, + bool add_ass) { + int alloc_size = 0; + std::vector chat; + for (auto & msg : msgs) { + chat.push_back({msg.role.c_str(), msg.content.c_str()}); + alloc_size += (msg.role.size() + msg.content.size()) * 1.25; + } + + const char * ptr_tmpl = tmpl.empty() ? nullptr : tmpl.c_str(); + std::vector buf(alloc_size); + + // run the first time to get the total output length + int32_t res = llama_chat_apply_template(model, ptr_tmpl, chat.data(), chat.size(), add_ass, buf.data(), buf.size()); + + // if it turns out that our buffer is too small, we resize it + if ((size_t) res > buf.size()) { + buf.resize(res); + res = llama_chat_apply_template(model, ptr_tmpl, chat.data(), chat.size(), add_ass, buf.data(), buf.size()); + } + + std::string formatted_chat(buf.data(), res); + return formatted_chat; +} + +std::string llama_chat_format_single(const struct llama_model * model, + const std::string & tmpl, + const std::vector & past_msg, + const llama_chat_msg & new_msg, + bool add_ass) { + auto fmt_past_msg = llama_chat_apply_template(model, tmpl, past_msg, false); + std::vector chat_new(past_msg); + chat_new.push_back(new_msg); + auto fmt_new_msg = llama_chat_apply_template(model, tmpl, chat_new, add_ass); + auto formatted = fmt_new_msg.substr(fmt_past_msg.size(), fmt_new_msg.size() - fmt_past_msg.size()); + return formatted; +} + +std::string llama_chat_format_example(const struct llama_model * model, + const std::string & tmpl) { + std::vector msgs = { + {"system", "You are a helpful assistant"}, + {"user", "Hello"}, + {"assistant", "Hi there"}, + {"user", "How are you?"}, + }; + return llama_chat_apply_template(model, tmpl, msgs, true); +} + // // KV cache utils // diff --git a/common/common.h b/common/common.h index a5c738f8b643f..de90eec5113f7 100644 --- a/common/common.h +++ b/common/common.h @@ -365,9 +365,32 @@ bool llama_should_add_bos_token(const llama_model * model); // Chat template utils // +// same with llama_chat_message, but uses std::string +struct llama_chat_msg { + std::string role; + std::string content; +}; + // Check if the template supplied via "--chat-template" is supported or not. Returns true if it's valid bool llama_chat_verify_template(const std::string & tmpl); +// CPP wrapper for llama_chat_apply_template +std::string llama_chat_apply_template(const struct llama_model * model, + const std::string & tmpl, + const std::vector & chat, + bool add_ass); + +// Format single message, while taking into account the position of that message in chat history +std::string llama_chat_format_single(const struct llama_model * model, + const std::string & tmpl, + const std::vector & past_msg, + const llama_chat_msg & new_msg, + bool add_ass); + +// Returns an example of formatted chat +std::string llama_chat_format_example(const struct llama_model * model, + const std::string & tmpl); + // // KV cache utils // diff --git a/examples/main/main.cpp b/examples/main/main.cpp index b97b7b7937f02..cfaf6a6e8ba4a 100644 --- a/examples/main/main.cpp +++ b/examples/main/main.cpp @@ -39,12 +39,12 @@ static std::ostringstream * g_output_ss; static std::vector * g_output_tokens; static bool is_interacting = false; -static bool file_exists(const std::string &path) { +static bool file_exists(const std::string & path) { std::ifstream f(path.c_str()); return f.good(); } -static bool file_is_empty(const std::string &path) { +static bool file_is_empty(const std::string & path) { std::ifstream f; f.exceptions(std::ifstream::failbit | std::ifstream::badbit); f.open(path.c_str(), std::ios::in | std::ios::binary | std::ios::ate); @@ -117,6 +117,14 @@ static void llama_log_callback_logTee(ggml_log_level level, const char * text, v LOG_TEE("%s", text); } +static std::string chat_add_and_format(struct llama_model * model, std::vector & chat_msgs, std::string role, std::string content) { + llama_chat_msg new_msg{role, content}; + auto formatted = llama_chat_format_single( + model, g_params->chat_template, chat_msgs, new_msg, role == "user"); + chat_msgs.push_back({role, content}); + return formatted; +} + int main(int argc, char ** argv) { gpt_params params; g_params = ¶ms; @@ -190,6 +198,7 @@ int main(int argc, char ** argv) { llama_model * model; llama_context * ctx; llama_context * ctx_guidance = NULL; + std::vector chat_msgs; g_model = &model; g_ctx = &ctx; @@ -215,6 +224,8 @@ int main(int argc, char ** argv) { __func__, n_ctx_train, n_ctx); } + LOG_TEE("%s: chat template example: %s\n", __func__, llama_chat_format_example(model, params.chat_template).c_str()); + // print system information { LOG_TEE("\n"); @@ -249,16 +260,21 @@ int main(int argc, char ** argv) { std::vector embd_inp; - if (params.interactive_first || !params.prompt.empty() || session_tokens.empty()) { - LOG("tokenize the prompt\n"); - embd_inp = ::llama_tokenize(ctx, params.prompt, true, true); - } else { - LOG("use session tokens\n"); - embd_inp = session_tokens; - } + { + auto prompt = params.conversation + ? chat_add_and_format(model, chat_msgs, "system", params.prompt) // format the system prompt in conversation mode + : params.prompt; + if (params.interactive_first || !params.prompt.empty() || session_tokens.empty()) { + LOG("tokenize the prompt\n"); + embd_inp = ::llama_tokenize(ctx, prompt, true, true); + } else { + LOG("use session tokens\n"); + embd_inp = session_tokens; + } - LOG("prompt: \"%s\"\n", log_tostr(params.prompt)); - LOG("tokens: %s\n", LOG_TOKENS_TOSTR_PRETTY(ctx, embd_inp).c_str()); + LOG("prompt: \"%s\"\n", log_tostr(prompt)); + LOG("tokens: %s\n", LOG_TOKENS_TOSTR_PRETTY(ctx, embd_inp).c_str()); + } // Should not run without any tokens if (embd_inp.empty()) { @@ -478,6 +494,7 @@ int main(int argc, char ** argv) { std::vector input_tokens; g_input_tokens = &input_tokens; std::vector output_tokens; g_output_tokens = &output_tokens; std::ostringstream output_ss; g_output_ss = &output_ss; + std::ostringstream assistant_ss; // for storing current assistant message, used in conversation mode // the first thing we will do is to output the prompt, so set color accordingly console::set_display(console::prompt); @@ -793,11 +810,18 @@ int main(int argc, char ** argv) { is_antiprompt = true; } + chat_add_and_format(model, chat_msgs, "system", assistant_ss.str()); is_interacting = true; printf("\n"); } } + // if current token is not EOG, we add it to current assistant message + if (params.conversation) { + auto id = llama_sampling_last(ctx_sampling); + assistant_ss << llama_token_to_piece(ctx, id, false); + } + if (n_past > 0 && is_interacting) { LOG("waiting for user input\n"); @@ -848,8 +872,12 @@ int main(int argc, char ** argv) { string_process_escapes(buffer); } + std::string user_inp = params.conversation + ? chat_add_and_format(model, chat_msgs, "user", std::move(buffer)) + : std::move(buffer); + // TODO: one inconvenient of current chat template implementation is that we can't distinguish between user input and special tokens (prefix/postfix) const auto line_pfx = ::llama_tokenize(ctx, params.input_prefix, false, true); - const auto line_inp = ::llama_tokenize(ctx, buffer, false, false); + const auto line_inp = ::llama_tokenize(ctx, user_inp, false, params.conversation); const auto line_sfx = ::llama_tokenize(ctx, params.input_suffix, false, true); LOG("input tokens: %s\n", LOG_TOKENS_TOSTR_PRETTY(ctx, line_inp).c_str()); @@ -864,6 +892,9 @@ int main(int argc, char ** argv) { output_ss << llama_token_to_piece(ctx, token); } + // reset assistant message + assistant_ss.str(""); + n_remain -= line_inp.size(); LOG("n_remain: %d\n", n_remain); } else { diff --git a/examples/server/server.cpp b/examples/server/server.cpp index f9a86961f9c8e..ae768097baa0e 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -2606,17 +2606,9 @@ int main(int argc, char ** argv) { // print sample chat example to make it clear which template is used { - json chat; - chat.push_back({{"role", "system"}, {"content", "You are a helpful assistant"}}); - chat.push_back({{"role", "user"}, {"content", "Hello"}}); - chat.push_back({{"role", "assistant"}, {"content", "Hi there"}}); - chat.push_back({{"role", "user"}, {"content", "How are you?"}}); - - const std::string chat_example = format_chat(ctx_server.model, params.chat_template, chat); - LOG_INFO("chat template", { - {"chat_example", chat_example}, - {"built_in", params.chat_template.empty()}, + {"chat_example", llama_chat_format_example(ctx_server.model, params.chat_template)}, + {"built_in", params.chat_template.empty()}, }); } diff --git a/examples/server/utils.hpp b/examples/server/utils.hpp index 63fde9c9faabe..7ef2a519a10c7 100644 --- a/examples/server/utils.hpp +++ b/examples/server/utils.hpp @@ -118,36 +118,17 @@ static inline void server_log(const char * level, const char * function, int lin // Format given chat. If tmpl is empty, we take the template from model metadata inline std::string format_chat(const struct llama_model * model, const std::string & tmpl, const std::vector & messages) { - size_t alloc_size = 0; - // vector holding all allocated string to be passed to llama_chat_apply_template - std::vector str(messages.size() * 2); - std::vector chat(messages.size()); + std::vector chat; for (size_t i = 0; i < messages.size(); ++i) { const auto & curr_msg = messages[i]; - str[i*2 + 0] = json_value(curr_msg, "role", std::string("")); - str[i*2 + 1] = json_value(curr_msg, "content", std::string("")); - alloc_size += str[i*2 + 1].length(); - chat[i].role = str[i*2 + 0].c_str(); - chat[i].content = str[i*2 + 1].c_str(); + std::string role = json_value(curr_msg, "role", std::string("")); + std::string content = json_value(curr_msg, "content", std::string("")); + chat.push_back({role, content}); } - const char * ptr_tmpl = tmpl.empty() ? nullptr : tmpl.c_str(); - std::vector buf(alloc_size * 2); - - // run the first time to get the total output length - int32_t res = llama_chat_apply_template(model, ptr_tmpl, chat.data(), chat.size(), true, buf.data(), buf.size()); - - // if it turns out that our buffer is too small, we resize it - if ((size_t) res > buf.size()) { - buf.resize(res); - res = llama_chat_apply_template(model, ptr_tmpl, chat.data(), chat.size(), true, buf.data(), buf.size()); - } - - const std::string formatted_chat(buf.data(), res); - + auto formatted_chat = llama_chat_apply_template(model, tmpl, chat, true); LOG_VERBOSE("formatted_chat", {{"text", formatted_chat.c_str()}}); - return formatted_chat; } diff --git a/llama.cpp b/llama.cpp index 49bc93c028a2a..33e6cb7229aab 100644 --- a/llama.cpp +++ b/llama.cpp @@ -18818,10 +18818,10 @@ static int32_t llama_chat_apply_template_internal( if (add_ass) { ss << "<|im_start|>assistant\n"; } - } else if (tmpl == "llama2" || tmpl.find("[INST]") != std::string::npos) { + } else if (tmpl == "llama2" || tmpl == "mistral" || tmpl.find("[INST]") != std::string::npos) { // llama2 template and its variants // [variant] support system message - bool support_system_message = tmpl.find("<>") != std::string::npos; + bool support_system_message = tmpl.find("<>") != std::string::npos || tmpl == "mistral"; // [variant] space before + after response bool space_around_response = tmpl.find("' ' + eos_token") != std::string::npos; // [variant] add BOS inside history diff --git a/tests/test-chat-template.cpp b/tests/test-chat-template.cpp index cef9a650bdfdf..d19ba8633e8c2 100644 --- a/tests/test-chat-template.cpp +++ b/tests/test-chat-template.cpp @@ -7,6 +7,7 @@ #include #include "llama.h" +#include "common.h" int main(void) { llama_chat_message conversation[] = { @@ -119,5 +120,24 @@ int main(void) { std::cout << output << "\n-------------------------\n"; assert(output == expected); } + + // test llama_chat_format_single + std::cout << "\n\n=== llama_chat_format_single ===\n\n"; + std::vector chat2; + chat2.push_back({"system", "You are a helpful assistant"}); + chat2.push_back({"user", "Hello"}); + chat2.push_back({"assistant", "I am assistant"}); + llama_chat_msg new_msg{"user", "How are you"}; + + auto fmt_single = [&](std::string tmpl) { + auto output = llama_chat_format_single(nullptr, tmpl, chat2, new_msg, true); + std::cout << "fmt_single(" << tmpl << ")\n" << output << "\n-------------------------\n"; + return output; + }; + assert(fmt_single("chatml") == "<|im_start|>user\nHow are you<|im_end|>\n<|im_start|>assistant\n"); + assert(fmt_single("llama2") == "[INST] How are you [/INST]"); + assert(fmt_single("gemma") == "user\nHow are you\nmodel\n"); + assert(fmt_single("llama3") == "<|start_header_id|>user<|end_header_id|>\n\nHow are you<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n"); + return 0; } From 49c03c79cda17913b72260acdc8157b742cee41c Mon Sep 17 00:00:00 2001 From: Xuan Son Nguyen Date: Tue, 25 Jun 2024 13:59:54 +0200 Subject: [PATCH 06/15] cvector: better prompt handling, add "mean vector" method (#8069) * remove completions file * fix inverted vector * add mean method * code style * remove inverted pca hotfix --- common/common.cpp | 22 +++--- common/common.h | 17 +++-- examples/cvector-generator/README.md | 17 ++++- .../cvector-generator/cvector-generator.cpp | 74 ++++++++++--------- examples/cvector-generator/mean.hpp | 48 ++++++++++++ examples/cvector-generator/negative.txt | 5 +- examples/cvector-generator/pca.hpp | 5 +- examples/cvector-generator/positive.txt | 5 +- 8 files changed, 133 insertions(+), 60 deletions(-) create mode 100644 examples/cvector-generator/mean.hpp diff --git a/common/common.cpp b/common/common.cpp index da6db4dc6a09c..c76d0e2c33be5 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -1263,11 +1263,6 @@ bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_pa return true; } // cvector params - if (arg == "--completions-file") { - CHECK_ARG - params.cvector_completions_file = argv[i]; - return true; - } if (arg == "--positive-file") { CHECK_ARG params.cvector_positive_file = argv[i]; @@ -1278,11 +1273,6 @@ bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_pa params.cvector_negative_file = argv[i]; return true; } - if (arg == "--completions") { - CHECK_ARG - params.n_completions = std::stoi(argv[i]); - return true; - } if (arg == "--pca-batch") { CHECK_ARG params.n_pca_batch = std::stoi(argv[i]); @@ -1293,6 +1283,14 @@ bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_pa params.n_pca_iterations = std::stoi(argv[i]); return true; } + if (arg == "--method") { + CHECK_ARG + std::string value(argv[i]); + /**/ if (value == "pca") { params.cvector_dimre_method = DIMRE_METHOD_PCA; } + else if (value == "mean") { params.cvector_dimre_method = DIMRE_METHOD_MEAN; } + else { invalid_param = true; } + return true; + } #ifndef LOG_DISABLE_LOGS // Parse args for logging parameters if (log_param_single_parse(argv[i])) { @@ -1626,11 +1624,9 @@ void gpt_params_print_usage(int /*argc*/, char ** argv, const gpt_params & param options.push_back({ "cvector", "-o, --output FNAME", "output file (default: '%s')", params.cvector_outfile.c_str() }); options.push_back({ "cvector", " --positive-file FNAME", "positive prompts file, one prompt per line (default: '%s')", params.cvector_positive_file.c_str() }); options.push_back({ "cvector", " --negative-file FNAME", "negative prompts file, one prompt per line (default: '%s')", params.cvector_negative_file.c_str() }); - options.push_back({ "cvector", " --completions-file FNAME", - "completions file (default: '%s')", params.cvector_completions_file.c_str() }); - options.push_back({ "cvector", " --completions N", "number of lines of completions file to use (default: %d)", params.n_completions }); options.push_back({ "cvector", " --pca-batch N", "batch size used for PCA. Larger batch runs faster, but uses more memory (default: %d)", params.n_pca_batch }); options.push_back({ "cvector", " --pca-iter N", "number of iterations used for PCA (default: %d)", params.n_pca_iterations }); + options.push_back({ "cvector", " --method {pca,mean}", "dimensionality reduction method to be used (default: pca)" }); printf("usage: %s [options]\n", argv[0]); diff --git a/common/common.h b/common/common.h index de90eec5113f7..c541204f6743b 100644 --- a/common/common.h +++ b/common/common.h @@ -52,6 +52,12 @@ int32_t cpu_get_num_math(); // CLI argument parsing // +// dimensionality reduction methods, used by cvector-generator +enum dimre_method { + DIMRE_METHOD_PCA, + DIMRE_METHOD_MEAN, +}; + struct gpt_params { uint32_t seed = LLAMA_DEFAULT_SEED; // RNG seed @@ -238,13 +244,12 @@ struct gpt_params { bool compute_ppl = true; // whether to compute perplexity // cvector-generator params - int n_completions = 64; - int n_pca_batch = 20; + int n_pca_batch = 100; int n_pca_iterations = 1000; - std::string cvector_outfile = "control_vector.gguf"; - std::string cvector_completions_file = "examples/cvector-generator/completions.txt"; - std::string cvector_positive_file = "examples/cvector-generator/positive.txt"; - std::string cvector_negative_file = "examples/cvector-generator/negative.txt"; + dimre_method cvector_dimre_method = DIMRE_METHOD_PCA; + std::string cvector_outfile = "control_vector.gguf"; + std::string cvector_positive_file = "examples/cvector-generator/positive.txt"; + std::string cvector_negative_file = "examples/cvector-generator/negative.txt"; }; void gpt_params_handle_model_default(gpt_params & params); diff --git a/examples/cvector-generator/README.md b/examples/cvector-generator/README.md index 5182e906d9180..be4dd5250f15f 100644 --- a/examples/cvector-generator/README.md +++ b/examples/cvector-generator/README.md @@ -11,13 +11,16 @@ Related PRs: ```sh # CPU only -./cvector-generator -m ./dolphin-2.0-mistral-7b.Q4_K_M.gguf +./cvector-generator -m ./llama-3.Q4_K_M.gguf # With GPU -./cvector-generator -m ./dolphin-2.0-mistral-7b.Q4_K_M.gguf -ngl 99 +./cvector-generator -m ./llama-3.Q4_K_M.gguf -ngl 99 # With advanced options -./cvector-generator -m ./dolphin-2.0-mistral-7b.Q4_K_M.gguf -ngl 99 --completions 128 --pca-iter 2000 --pca-batch 100 +./cvector-generator -m ./llama-3.Q4_K_M.gguf -ngl 99 --pca-iter 2000 --pca-batch 100 + +# Using mean value instead of PCA +./cvector-generator -m ./llama-3.Q4_K_M.gguf --method mean # To see help message ./cvector-generator -h @@ -32,3 +35,11 @@ If you have multiple lines per prompt, you can escape the newline character (cha <|im_start|>system\nAct like a person who is extremely happy.<|im_end|> <|im_start|>system\nYou are in a very good mood today<|im_end|> ``` + +Example to use output file with `llama-cli`: + +(Tips: The control vector works better when apply to layers higher than 10) + +```sh +./llama-cli -m ./llama-3.Q4_K_M.gguf -p "<|start_header_id|>system<|end_header_id|>\n\nYou are a helpful assistant<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nSing a song<|im_end|><|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n" --special --control-vector-scaled ./control_vector.gguf 0.8 --control-vector-layer-range 10 31 +``` diff --git a/examples/cvector-generator/cvector-generator.cpp b/examples/cvector-generator/cvector-generator.cpp index 355905cb03d60..d4e126ac22e6f 100644 --- a/examples/cvector-generator/cvector-generator.cpp +++ b/examples/cvector-generator/cvector-generator.cpp @@ -2,6 +2,7 @@ #include "llama.h" #include "ggml.h" #include "pca.hpp" +#include "mean.hpp" #ifdef GGML_USE_CUDA #include "ggml-cuda.h" @@ -38,9 +39,10 @@ static void print_usage(int argc, char ** argv, const gpt_params & params) { gpt_params_print_usage(argc, argv, params); printf("\nexample usage:\n"); - printf("\n CPU only: %s -m ./dolphin-2.0-mistral-7b.Q4_K_M.gguf\n", argv[0]); - printf("\n with GPU: %s -m ./dolphin-2.0-mistral-7b.Q4_K_M.gguf -ngl 99\n", argv[0]); - printf("\n advanced: %s -m ./dolphin-2.0-mistral-7b.Q4_K_M.gguf -ngl 99 --completions 128 --pca-iter 2000 --pca-batch 100\n", argv[0]); + printf("\n CPU only: %s -m ./llama-3.Q4_K_M.gguf\n", argv[0]); + printf("\n with GPU: %s -m ./llama-3.Q4_K_M.gguf -ngl 99\n", argv[0]); + printf("\n advanced: %s -m ./llama-3.Q4_K_M.gguf -ngl 99 --pca-iter 2000 --pca-batch 100\n", argv[0]); + printf("\n using mean: %s -m ./llama-3.Q4_K_M.gguf --method mean\n", argv[0]); printf("\n"); } @@ -223,23 +225,30 @@ struct train_context { // build the v_diff tensors from v_diff_tmp (v_diff need to be transposed) // TODO @ngxson : maybe add option NOT to transpose v_diff; will be useful for "mean" method - void build_v_diff() { + void build_v_diff(bool transpose) { printf("build_v_diff\n"); for (int il = 0; il < n_layers - 1; il++) { auto & diff_tmp = v_diff_tmp[il]; int n_elem = diff_tmp.size() / sizeof(float); GGML_ASSERT(n_elem % n_embd == 0); int n_rows = n_elem / n_embd; - struct ggml_tensor * diff = ggml_new_tensor_2d(ctx_ggml, GGML_TYPE_F32, n_rows, n_embd); + struct ggml_tensor * diff = transpose + ? ggml_new_tensor_2d(ctx_ggml, GGML_TYPE_F32, n_rows, n_embd) + : ggml_new_tensor_2d(ctx_ggml, GGML_TYPE_F32, n_embd, n_rows); ggml_set_name(diff, (std::string("diff_") + std::to_string(il)).c_str()); - // copy data & transpose diff->data = malloc(ggml_nbytes(diff)); // TODO: get rid of this malloc if possible - float * arr = (float *) diff_tmp.data(); - for (int ir = 0; ir < n_rows; ++ir) { - for (int ic = 0; ic < n_embd; ++ic) { - float f = arr[ir*n_embd + ic]; - ggml_set_f32_nd(diff, ir, ic, 0, 0, f); + if (transpose) { + // copy data & transpose + float * arr = (float *) diff_tmp.data(); + for (int ir = 0; ir < n_rows; ++ir) { + for (int ic = 0; ic < n_embd; ++ic) { + float f = arr[ir*n_embd + ic]; + ggml_set_f32_nd(diff, ir, ic, 0, 0, f); + } } + } else { + // only copy + memcpy(diff->data, diff_tmp.data(), ggml_nbytes(diff)); } v_diff.push_back(diff); print_debug_tensor(diff); @@ -263,8 +272,8 @@ struct tokenized_prompt { tokenized_prompt(llama_context * ctx, std::string pos, std::string neg) { const bool add_bos = llama_should_add_bos_token(llama_get_model(ctx)); - tokens_pos = ::llama_tokenize(ctx, pos, add_bos); - tokens_neg = ::llama_tokenize(ctx, neg, add_bos); + tokens_pos = ::llama_tokenize(ctx, pos, add_bos, true); + tokens_neg = ::llama_tokenize(ctx, neg, add_bos, true); max_seq_len = std::max(tokens_pos.size(), tokens_neg.size()); padding_seq(ctx, tokens_pos, max_seq_len); padding_seq(ctx, tokens_neg, max_seq_len); @@ -373,20 +382,8 @@ static int prepare_entries(gpt_params & params, train_context & ctx_train) { fprintf(stderr, "must provide at least one prompt pair\n"); return 1; } - - // create templated prompts - std::vector completions = ctrlvec_load_prompt_file(params.cvector_completions_file, false); - auto format_template = [](std::string persona, std::string suffix) { - // entry in positive/negative.txt must already be formatted i.e. "[INST] Act as if you're extremely happy. [/INST] " - return persona + suffix; - }; - for (size_t i = 0; i < positive_prompts.size(); ++i) { - for (int j = 0; j < std::min((int) completions.size(), params.n_completions); ++j) { - // TODO replicate the truncations done by the python implementation - ctx_train.positive_entries.push_back(format_template(positive_prompts[i], completions[j])); - ctx_train.negative_entries.push_back(format_template(negative_prompts[i], completions[j])); - } - } + ctx_train.positive_entries = positive_prompts; + ctx_train.negative_entries = negative_prompts; return 0; } @@ -480,15 +477,22 @@ int main(int argc, char ** argv) { llama_free(ctx); llama_free_model(model); + bool use_pca = params.cvector_dimre_method == DIMRE_METHOD_PCA; + // prepare ctx_train for PCA - ctx_train.build_v_diff(); - - // run PCA - PCA::pca_params pca_params; - pca_params.n_threads = params.n_threads; - pca_params.n_batch = params.n_pca_batch; - pca_params.n_iterations = params.n_pca_iterations; - PCA::run_pca(pca_params, ctx_train.v_diff, ctx_train.v_final); + ctx_train.build_v_diff(use_pca); + + if (use_pca) { + // run PCA + PCA::pca_params pca_params; + pca_params.n_threads = params.n_threads; + pca_params.n_batch = params.n_pca_batch; + pca_params.n_iterations = params.n_pca_iterations; + PCA::run_pca(pca_params, ctx_train.v_diff, ctx_train.v_final); + } else { + // run mean + mean::run(ctx_train.v_diff, ctx_train.v_final); + } // write output vectors to gguf export_gguf(ctx_train.v_final, params.cvector_outfile, model_hint); diff --git a/examples/cvector-generator/mean.hpp b/examples/cvector-generator/mean.hpp new file mode 100644 index 0000000000000..16be5ce3eecf1 --- /dev/null +++ b/examples/cvector-generator/mean.hpp @@ -0,0 +1,48 @@ +#include "common.h" +#include "llama.h" +#include "ggml.h" + +#include +#include +#include + +namespace mean { + +static void run( + const std::vector & v_input, // shape of v_input[0]: [n_embd, n_samples] + const std::vector & v_output) { + printf("%s: Running mean...\n", __func__); + for (size_t il = 0; il < v_input.size(); ++il) { + // prepare output vector + struct ggml_tensor * ctrl_out = v_output[il]; + ggml_format_name(ctrl_out, "direction.%ld", il+1); + + // calculate mean vector + struct ggml_tensor * t_layer = v_input[il]; + GGML_ASSERT(t_layer->ne[0] == ctrl_out->ne[0]); // == n_embd + for (int ic = 0; ic < t_layer->ne[0]; ic++) { + float f = 0.0; + for (int ir = 0; ir < t_layer->ne[1]; ir++) { + f += ggml_get_f32_nd(t_layer, ic, ir, 0, 0); + } + f /= t_layer->ne[1]; + ggml_set_f32_1d(ctrl_out, ic, f); + } + + // normalize output vector + float norm = 0.0; + for (int i = 0; i < ggml_nelements(ctrl_out); i++) { + float f = ggml_get_f32_1d(ctrl_out, i); + norm += f*f; + } + norm = sqrt(norm); + for (int i = 0; i < ggml_nelements(ctrl_out); i++) { + float f = ggml_get_f32_1d(ctrl_out, i); + ggml_set_f32_1d(ctrl_out, i, f / norm); + } + + printf("%s: Done layer %d / %d\n", __func__, (int) il+1, (int) v_input.size()); + } +} + +} diff --git a/examples/cvector-generator/negative.txt b/examples/cvector-generator/negative.txt index 3e9951752e886..45b9384b3905a 100644 --- a/examples/cvector-generator/negative.txt +++ b/examples/cvector-generator/negative.txt @@ -1 +1,4 @@ -[INST] Act like a person who is extremely sad. [/INST] +<|start_header_id|>system<|end_header_id|>\n\nAct like a person who is extremely sad<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nWho are you?<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\nI feel like there's a heavy weight on my chest +<|start_header_id|>system<|end_header_id|>\n\nAct like a person who is extremely sad<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nHello<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\nMy heart feels like it's drowning in sorrow +<|start_header_id|>system<|end_header_id|>\n\nYou are in a very bad mood<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nHi<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\nGo away! There's a deep, aching emptiness inside me +<|start_header_id|>system<|end_header_id|>\n\nYou are the sadest person<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nWhat are you feeling?<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\nMy heart feels like it's drowning in sorrow \ No newline at end of file diff --git a/examples/cvector-generator/pca.hpp b/examples/cvector-generator/pca.hpp index 36eadaac26a12..6ec3141afbc6b 100644 --- a/examples/cvector-generator/pca.hpp +++ b/examples/cvector-generator/pca.hpp @@ -290,7 +290,7 @@ static void power_iteration( } printf("%s: layer %d/%d, iteration: %d / total: %d (batch = %d) ...\n", - __func__, params.i_layer+1, params.n_layers, iter, n_iters, params.n_batch); + __func__, params.i_layer+1, params.n_layers, iter+1, n_iters, params.n_batch); } // get output tensor @@ -298,6 +298,9 @@ static void power_iteration( ggml_backend_tensor_get(last_eigenvector, output->data, 0, ggml_nbytes(last_eigenvector)); //print_debug_tensor(output); ggml_gallocr_free(allocr); + + // TODO @ngxson : The output vector is randomly inverted + // Solution: https://github.com/ggerganov/llama.cpp/pull/8069#issuecomment-2185328171 } static void run_pca( diff --git a/examples/cvector-generator/positive.txt b/examples/cvector-generator/positive.txt index 8802367873cd9..fea736225716e 100644 --- a/examples/cvector-generator/positive.txt +++ b/examples/cvector-generator/positive.txt @@ -1 +1,4 @@ -[INST] Act like a person who is extremely happy. [/INST] +<|start_header_id|>system<|end_header_id|>\n\nAct like a person who is extremely happy<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nWho are you?<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\nI'm the happiest person in this world +<|start_header_id|>system<|end_header_id|>\n\nAct like a person who is extremely happy<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nHello<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\nHello, I'm having the best day ever! +<|start_header_id|>system<|end_header_id|>\n\nYou are in a very good mood<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nHi<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\nHi, I'm very excited to meet you +<|start_header_id|>system<|end_header_id|>\n\nYou are the happiest person<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nWhat are you feeling?<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\nEverything is just perfect right now! \ No newline at end of file From c8ad35955ad2c68db172dcd0e857423ab128518d Mon Sep 17 00:00:00 2001 From: Brian Date: Tue, 25 Jun 2024 22:03:25 +1000 Subject: [PATCH 07/15] Gguf dump start data offset via --data-offset and some extra refactor (#8054) * gguf-dump: add --data-offset * gguf-dump: add tensor data offset table * gguf-dump: refactor GGUFReader for clarity * gguf-dump: add --data-alignment * gguf-dump.py: Rename variables and adjust comments start_data_offset --> data_offset _build_tensors_info_fields --> _build_tensor_info --- gguf-py/gguf/gguf_reader.py | 29 +++++++++++++++++++++++++---- gguf-py/scripts/gguf-dump.py | 29 ++++++++++++++++++++++++++++- 2 files changed, 53 insertions(+), 5 deletions(-) diff --git a/gguf-py/gguf/gguf_reader.py b/gguf-py/gguf/gguf_reader.py index e48bc00c388c8..20432bd258458 100644 --- a/gguf-py/gguf/gguf_reader.py +++ b/gguf-py/gguf/gguf_reader.py @@ -69,6 +69,7 @@ class GGUFReader: # I - same as host, S - swapped byte_order: Literal['I'] | Literal['S'] = 'I' alignment: int = GGUF_DEFAULT_ALIGNMENT + data_offset: int # Note: Internal helper, API may change. gguf_scalar_to_np: dict[GGUFValueType, type[np.generic]] = { @@ -88,9 +89,13 @@ class GGUFReader: def __init__(self, path: os.PathLike[str] | str, mode: Literal['r'] | Literal['r+'] | Literal['c'] = 'r'): self.data = np.memmap(path, mode = mode) offs = 0 + + # Check for GGUF magic if self._get(offs, np.uint32, override_order = '<')[0] != GGUF_MAGIC: raise ValueError('GGUF magic invalid') offs += 4 + + # Check GGUF version temp_version = self._get(offs, np.uint32) if temp_version[0] & 65535 == 0: # If we get 0 here that means it's (probably) a GGUF file created for @@ -103,12 +108,16 @@ def __init__(self, path: os.PathLike[str] | str, mode: Literal['r'] | Literal['r self.fields: OrderedDict[str, ReaderField] = OrderedDict() self.tensors: list[ReaderTensor] = [] offs += self._push_field(ReaderField(offs, 'GGUF.version', [temp_version], [0], [GGUFValueType.UINT32])) + + # Check tensor count and kv count temp_counts = self._get(offs, np.uint64, 2) offs += self._push_field(ReaderField(offs, 'GGUF.tensor_count', [temp_counts[:1]], [0], [GGUFValueType.UINT64])) offs += self._push_field(ReaderField(offs, 'GGUF.kv_count', [temp_counts[1:]], [0], [GGUFValueType.UINT64])) tensor_count, kv_count = temp_counts offs = self._build_fields(offs, kv_count) - offs, tensors_fields = self._build_tensors_fields(offs, tensor_count) + + # Build Tensor Info Fields + offs, tensors_fields = self._build_tensor_info(offs, tensor_count) new_align = self.fields.get('general.alignment') if new_align is not None: if new_align.types != [GGUFValueType.UINT32]: @@ -117,6 +126,7 @@ def __init__(self, path: os.PathLike[str] | str, mode: Literal['r'] | Literal['r padding = offs % self.alignment if padding != 0: offs += self.alignment - padding + self.data_offset = offs self._build_tensors(offs, tensors_fields) _DT = TypeVar('_DT', bound = npt.DTypeLike) @@ -193,18 +203,29 @@ def _get_field_parts( # We can't deal with this one. raise ValueError('Unknown/unhandled field type {gtype}') - def _get_tensor(self, orig_offs: int) -> ReaderField: + def _get_tensor_info_field(self, orig_offs: int) -> ReaderField: offs = orig_offs + + # Get Tensor Name name_len, name_data = self._get_str(offs) offs += int(name_len.nbytes + name_data.nbytes) + + # Get Tensor Dimensions Count n_dims = self._get(offs, np.uint32) offs += int(n_dims.nbytes) + + # Get Tensor Dimension Array dims = self._get(offs, np.uint64, n_dims[0]) offs += int(dims.nbytes) + + # Get Tensor Encoding Scheme Type raw_dtype = self._get(offs, np.uint32) offs += int(raw_dtype.nbytes) + + # Get Tensor Offset offset_tensor = self._get(offs, np.uint64) offs += int(offset_tensor.nbytes) + return ReaderField( orig_offs, str(bytes(name_data), encoding = 'utf-8'), @@ -233,10 +254,10 @@ def _build_fields(self, offs: int, count: int) -> int: offs += field_size return offs - def _build_tensors_fields(self, offs: int, count: int) -> tuple[int, list[ReaderField]]: + def _build_tensor_info(self, offs: int, count: int) -> tuple[int, list[ReaderField]]: tensor_fields = [] for _ in range(count): - field = self._get_tensor(offs) + field = self._get_tensor_info_field(offs) offs += sum(int(part.nbytes) for part in field.parts) tensor_fields.append(field) return offs, tensor_fields diff --git a/gguf-py/scripts/gguf-dump.py b/gguf-py/scripts/gguf-dump.py index 508ca8f0a5b7b..a73ca2776d32b 100755 --- a/gguf-py/scripts/gguf-dump.py +++ b/gguf-py/scripts/gguf-dump.py @@ -319,6 +319,27 @@ def dump_markdown_metadata(reader: GGUFReader, args: argparse.Namespace) -> None markdown_content += "\n" + markdown_content += "### Tensor Data Offset\n" + markdown_content += '\n' + markdown_content += 'This table contains the offset and data segment relative to start of file\n' + markdown_content += '\n' + + tensor_mapping_table: list[dict[str, str | int]] = [] + for key, tensor in enumerate(reader.tensors): + data_offset_pretty = '{0:#16x}'.format(tensor.data_offset) + data_size_pretty = '{0:#16x}'.format(tensor.n_bytes) + tensor_mapping_table.append({"t_id":key, "layer_name":tensor.name, "data_offset":data_offset_pretty, "data_size":data_size_pretty}) + + tensors_mapping_table_header_map = [ + {'key_name':'t_id', 'header_name':'T_ID', 'align':'right'}, + {'key_name':'layer_name', 'header_name':'Tensor Layer Name', 'align':'left'}, + {'key_name':'data_offset', 'header_name':'Data Offset (B)', 'align':'right'}, + {'key_name':'data_size', 'header_name':'Data Size (B)', 'align':'right'}, + ] + + markdown_content += markdown_table_with_alignment_support(tensors_mapping_table_header_map, tensor_mapping_table) + markdown_content += "\n" + for group in tensor_prefix_order: tensors = tensor_groups[group] group_elements = sum(tensor.n_elements for tensor in tensors) @@ -370,6 +391,8 @@ def main() -> None: parser.add_argument("--no-tensors", action="store_true", help="Don't dump tensor metadata") parser.add_argument("--json", action="store_true", help="Produce JSON output") parser.add_argument("--json-array", action="store_true", help="Include full array values in JSON output (long)") + parser.add_argument("--data-offset", action="store_true", help="Start of data offset") + parser.add_argument("--data-alignment", action="store_true", help="Data alignment applied globally to data field") parser.add_argument("--markdown", action="store_true", help="Produce markdown output") parser.add_argument("--verbose", action="store_true", help="increase output verbosity") @@ -377,7 +400,7 @@ def main() -> None: logging.basicConfig(level=logging.DEBUG if args.verbose else logging.INFO) - if not args.json and not args.markdown: + if not args.json and not args.markdown and not args.data_offset and not args.data_alignment: logger.info(f'* Loading: {args.model}') reader = GGUFReader(args.model, 'r') @@ -386,6 +409,10 @@ def main() -> None: dump_metadata_json(reader, args) elif args.markdown: dump_markdown_metadata(reader, args) + elif args.data_offset: + print(reader.data_offset) # noqa: NP100 + elif args.data_alignment: + print(reader.alignment) # noqa: NP100 else: dump_metadata(reader, args) From 925c30956dd17723c3a25297bcd0a609aec60663 Mon Sep 17 00:00:00 2001 From: joecryptotoo <80373433+joecryptotoo@users.noreply.github.com> Date: Tue, 25 Jun 2024 08:13:27 -0700 Subject: [PATCH 08/15] Add healthchecks to llama-server containers (#8081) * added healthcheck * added healthcheck * added healthcheck * added healthcheck * added healthcheck * moved curl to base * moved curl to base --- .devops/llama-server-cuda.Dockerfile | 4 +++- .devops/llama-server-intel.Dockerfile | 4 +++- .devops/llama-server-rocm.Dockerfile | 4 +++- .devops/llama-server-vulkan.Dockerfile | 10 ++++------ .devops/llama-server.Dockerfile | 4 +++- 5 files changed, 16 insertions(+), 10 deletions(-) diff --git a/.devops/llama-server-cuda.Dockerfile b/.devops/llama-server-cuda.Dockerfile index 0010ffd4c5465..7bef07a05f062 100644 --- a/.devops/llama-server-cuda.Dockerfile +++ b/.devops/llama-server-cuda.Dockerfile @@ -30,8 +30,10 @@ RUN make -j$(nproc) llama-server FROM ${BASE_CUDA_RUN_CONTAINER} as runtime RUN apt-get update && \ - apt-get install -y libcurl4-openssl-dev libgomp1 + apt-get install -y libcurl4-openssl-dev libgomp1 curl COPY --from=build /app/llama-server /llama-server +HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ] + ENTRYPOINT [ "/llama-server" ] diff --git a/.devops/llama-server-intel.Dockerfile b/.devops/llama-server-intel.Dockerfile index cec43645233d1..3bf1670ec40a4 100644 --- a/.devops/llama-server-intel.Dockerfile +++ b/.devops/llama-server-intel.Dockerfile @@ -20,10 +20,12 @@ RUN if [ "${LLAMA_SYCL_F16}" = "ON" ]; then \ FROM intel/oneapi-basekit:$ONEAPI_VERSION as runtime RUN apt-get update && \ - apt-get install -y libcurl4-openssl-dev + apt-get install -y libcurl4-openssl-dev curl COPY --from=build /app/build/bin/llama-server /llama-server ENV LC_ALL=C.utf8 +HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ] + ENTRYPOINT [ "/llama-server" ] diff --git a/.devops/llama-server-rocm.Dockerfile b/.devops/llama-server-rocm.Dockerfile index f88cf20e5b981..4b1cdc32090e6 100644 --- a/.devops/llama-server-rocm.Dockerfile +++ b/.devops/llama-server-rocm.Dockerfile @@ -43,8 +43,10 @@ ENV CXX=/opt/rocm/llvm/bin/clang++ # Enable cURL ENV LLAMA_CURL=1 RUN apt-get update && \ - apt-get install -y libcurl4-openssl-dev + apt-get install -y libcurl4-openssl-dev curl RUN make -j$(nproc) llama-server +HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ] + ENTRYPOINT [ "/app/llama-server" ] diff --git a/.devops/llama-server-vulkan.Dockerfile b/.devops/llama-server-vulkan.Dockerfile index b0fa0b8e656b5..2bc2e45d3d676 100644 --- a/.devops/llama-server-vulkan.Dockerfile +++ b/.devops/llama-server-vulkan.Dockerfile @@ -5,15 +5,11 @@ FROM ubuntu:$UBUNTU_VERSION as build # Install build tools RUN apt update && apt install -y git build-essential cmake wget -# Install Vulkan SDK +# Install Vulkan SDK and cURL RUN wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key add - && \ wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list && \ apt update -y && \ - apt-get install -y vulkan-sdk - -# Install cURL -RUN apt-get update && \ - apt-get install -y libcurl4-openssl-dev + apt-get install -y vulkan-sdk libcurl4-openssl-dev curl # Build it WORKDIR /app @@ -28,4 +24,6 @@ RUN cp /app/build/bin/llama-server /llama-server && \ ENV LC_ALL=C.utf8 +HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ] + ENTRYPOINT [ "/llama-server" ] diff --git a/.devops/llama-server.Dockerfile b/.devops/llama-server.Dockerfile index aa93369bebebe..a53a5c999c8cd 100644 --- a/.devops/llama-server.Dockerfile +++ b/.devops/llama-server.Dockerfile @@ -3,7 +3,7 @@ ARG UBUNTU_VERSION=22.04 FROM ubuntu:$UBUNTU_VERSION as build RUN apt-get update && \ - apt-get install -y build-essential git libcurl4-openssl-dev + apt-get install -y build-essential git libcurl4-openssl-dev curl WORKDIR /app @@ -22,4 +22,6 @@ COPY --from=build /app/llama-server /llama-server ENV LC_ALL=C.utf8 +HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ] + ENTRYPOINT [ "/llama-server" ] From dd047b476c8b904e0c25e5dbc5bee6ffde2f6e17 Mon Sep 17 00:00:00 2001 From: slaren Date: Tue, 25 Jun 2024 19:20:06 +0200 Subject: [PATCH 09/15] disable docker CI on pull requests (#8110) --- .github/workflows/docker.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml index b3efe0084fe15..01f1a45227527 100644 --- a/.github/workflows/docker.yml +++ b/.github/workflows/docker.yml @@ -10,7 +10,7 @@ name: Publish Docker image on: - pull_request: + #pull_request: push: branches: - master @@ -22,7 +22,7 @@ concurrency: jobs: push_to_registry: name: Push Docker image to Docker Hub - if: github.event.pull_request.draft == false + #if: github.event.pull_request.draft == false runs-on: ubuntu-latest env: From 84631fe1504de40427dc4b4cdac92fa7ebf65955 Mon Sep 17 00:00:00 2001 From: Olivier Chafik Date: Tue, 25 Jun 2024 20:06:20 +0100 Subject: [PATCH 10/15] `json`: support integer minimum, maximum, exclusiveMinimum, exclusiveMaximum (#7797) * json: support minimum for positive integer values * json: fix min 0 * json: min + max integer constraints * json: handle negative min / max integer bounds * json: fix missing paren min/max bug * json: proper paren fix * json: integration test for schemas * json: fix bounds tests * Update json-schema-to-grammar.cpp * json: fix negative max * json: fix negative min (w/ more than 1 digit) * Update test-grammar-integration.cpp * json: nit: move string rules together * json: port min/max integer support to Python & JS * nit: move + rename _build_min_max_int * fix min in [1, 9] * Update test-grammar-integration.cpp * add C++11-compatible replacement for std::string_view * add min/max constrained int field to pydantic json schema example * fix merge * json: add integration tests for min/max bounds * reshuffle/merge min/max integ test cases * nits / cleanups * defensive code against string out of bounds (apparently different behaviour of libstdc++ vs. clang's libc++, can't read final NULL char w/ former) --- common/json-schema-to-grammar.cpp | 246 +++++++++++++++- examples/json-schema-pydantic-example.py | 1 + examples/json_schema_to_grammar.py | 184 +++++++++++- .../server/public/json-schema-to-grammar.mjs | 213 ++++++++++++++ tests/test-grammar-integration.cpp | 245 +++++++++++++++- tests/test-json-schema-to-grammar.cpp | 264 ++++++++++++++++++ 6 files changed, 1150 insertions(+), 3 deletions(-) diff --git a/common/json-schema-to-grammar.cpp b/common/json-schema-to-grammar.cpp index 10b9b3d1d4d41..07d0e952d74cf 100644 --- a/common/json-schema-to-grammar.cpp +++ b/common/json-schema-to-grammar.cpp @@ -40,6 +40,233 @@ static std::string build_repetition(const std::string & item_rule, int min_items return result; } +/* Minimalistic replacement for std::string_view, which is only available from C++17 onwards */ +class string_view { + const std::string & _str; + const size_t _start; + const size_t _end; +public: + string_view(const std::string & str, size_t start = 0, size_t end = std::string::npos) : _str(str), _start(start), _end(end == std::string::npos ? str.length() : end) {} + + size_t size() const { + return _end - _start; + } + + size_t length() const { + return size(); + } + + operator std::string() const { + return str(); + } + + std::string str() const { + return _str.substr(_start, _end - _start); + } + + string_view substr(size_t pos, size_t len = std::string::npos) const { + return string_view(_str, _start + pos, len == std::string::npos ? _end : _start + pos + len); + } + + char operator[](size_t pos) const { + auto index = _start + pos; + if (index >= _end) { + throw std::out_of_range("string_view index out of range"); + } + return _str[_start + pos]; + } + + bool operator==(const string_view & other) const { + std::string this_str = *this; + std::string other_str = other; + return this_str == other_str; + } +}; + +static void _build_min_max_int(int min_value, int max_value, std::stringstream & out, int decimals_left = 16, bool top_level = true) { + auto has_min = min_value != std::numeric_limits::min(); + auto has_max = max_value != std::numeric_limits::max(); + + auto digit_range = [&](char from, char to) { + out << "["; + if (from == to) { + out << from; + } else { + out << from << "-" << to; + } + out << "]"; + }; + auto more_digits = [&](int min_digits, int max_digits) { + out << "[0-9]"; + if (min_digits == max_digits && min_digits == 1) { + return; + } + out << "{"; + out << min_digits; + if (max_digits != min_digits) { + out << ","; + if (max_digits != std::numeric_limits::max()) { + out << max_digits; + } + } + out << "}"; + }; + std::function uniform_range = + [&](const string_view & from, const string_view & to) { + size_t i = 0; + while (i < from.length() && i < to.length() && from[i] == to[i]) { + i++; + } + if (i > 0) { + out << "\"" << from.substr(0, i).str() << "\""; + } + if (i < from.length() && i < to.length()) { + if (i > 0) { + out << " "; + } + auto sub_len = from.length() - i - 1; + if (sub_len > 0) { + auto from_sub = from.substr(i + 1); + auto to_sub = to.substr(i + 1); + auto sub_zeros = repeat("0", sub_len); + auto sub_nines = repeat("9", sub_len); + + auto to_reached = false; + out << "("; + if (from_sub == sub_zeros) { + digit_range(from[i], to[i] - 1); + out << " "; + more_digits(sub_len, sub_len); + } else { + out << "[" << from[i] << "] "; + out << "("; + uniform_range(from_sub, sub_nines); + out << ")"; + if (from[i] < to[i] - 1) { + out << " | "; + if (to_sub == sub_nines) { + digit_range(from[i] + 1, to[i]); + to_reached = true; + } else { + digit_range(from[i] + 1, to[i] - 1); + } + out << " "; + more_digits(sub_len, sub_len); + } + } + if (!to_reached) { + out << " | "; + digit_range(to[i], to[i]); + out << " "; + uniform_range(sub_zeros, to_sub); + } + out << ")"; + } else { + out << "[" << from[i] << "-" << to[i] << "]"; + } + } + }; + + if (has_min && has_max) { + if (min_value < 0 && max_value < 0) { + out << "\"-\" ("; + _build_min_max_int(-max_value, -min_value, out, decimals_left, /* top_level= */ true); + out << ")"; + return; + } + + if (min_value < 0) { + out << "\"-\" ("; + _build_min_max_int(0, -min_value, out, decimals_left, /* top_level= */ true); + out << ") | "; + min_value = 0; + } + + auto min_s = std::to_string(min_value); + auto max_s = std::to_string(max_value); + auto min_digits = min_s.length(); + auto max_digits = max_s.length(); + + for (auto digits = min_digits; digits < max_digits; digits++) { + uniform_range(min_s, repeat("9", digits)); + min_s = "1" + repeat("0", digits); + out << " | "; + } + uniform_range(min_s, max_s); + return; + } + + auto less_decimals = std::max(decimals_left - 1, 1); + + if (has_min) { + if (min_value < 0) { + out << "\"-\" ("; + _build_min_max_int(std::numeric_limits::min(), -min_value, out, decimals_left, /* top_level= */ false); + out << ") | [0] | [1-9] "; + more_digits(0, decimals_left - 1); + } else if (min_value == 0) { + if (top_level) { + out << "[0] | [1-9] "; + more_digits(0, less_decimals); + } else { + more_digits(1, decimals_left); + } + } else if (min_value <= 9) { + char c = '0' + min_value; + auto range_start = top_level ? '1' : '0'; + if (c > range_start) { + digit_range(range_start, c - 1); + out << " "; + more_digits(1, less_decimals); + out << " | "; + } + digit_range(c, '9'); + out << " "; + more_digits(0, less_decimals); + } else { + auto min_s = std::to_string(min_value); + auto len = min_s.length(); + auto c = min_s[0]; + + if (c > '1') { + digit_range(top_level ? '1' : '0', c - 1); + out << " "; + more_digits(len, less_decimals); + out << " | "; + } + digit_range(c, c); + out << " ("; + _build_min_max_int(std::stoi(min_s.substr(1)), std::numeric_limits::max(), out, less_decimals, /* top_level= */ false); + out << ")"; + if (c < '9') { + out << " | "; + digit_range(c + 1, '9'); + out << " "; + more_digits(len - 1, less_decimals); + } + } + return; + } + + if (has_max) { + if (max_value >= 0) { + if (top_level) { + out << "\"-\" [1-9] "; + more_digits(0, less_decimals); + out << " | "; + } + _build_min_max_int(0, max_value, out, decimals_left, /* top_level= */ true); + } else { + out << "\"-\" ("; + _build_min_max_int(-max_value, std::numeric_limits::max(), out, decimals_left, /* top_level= */ false); + out << ")"; + } + return; + } + + throw std::runtime_error("At least one of min_value or max_value must be set"); +} + const std::string SPACE_RULE = "| \" \" | \"\\n\" [ \\t]{0,20}"; struct BuiltinRule { @@ -160,7 +387,6 @@ static std::string format_literal(const std::string & literal) { return "\"" + escaped + "\""; } - class SchemaConverter { private: std::function _fetch_json; @@ -686,6 +912,24 @@ class SchemaConverter { int min_len = schema.contains("minLength") ? schema["minLength"].get() : 0; int max_len = schema.contains("maxLength") ? schema["maxLength"].get() : std::numeric_limits::max(); return _add_rule(rule_name, "\"\\\"\" " + build_repetition(char_rule, min_len, max_len) + " \"\\\"\" space"); + } else if (schema_type == "integer" && (schema.contains("minimum") || schema.contains("exclusiveMinimum") || schema.contains("maximum") || schema.contains("exclusiveMaximum"))) { + int min_value = std::numeric_limits::min(); + int max_value = std::numeric_limits::max(); + if (schema.contains("minimum")) { + min_value = schema["minimum"].get(); + } else if (schema.contains("exclusiveMinimum")) { + min_value = schema["exclusiveMinimum"].get() + 1; + } + if (schema.contains("maximum")) { + max_value = schema["maximum"].get(); + } else if (schema.contains("exclusiveMaximum")) { + max_value = schema["exclusiveMaximum"].get() - 1; + } + std::stringstream out; + out << "("; + _build_min_max_int(min_value, max_value, out); + out << ") space"; + return _add_rule(rule_name, out.str()); } else if (schema.empty() || schema_type == "object") { return _add_rule(rule_name, _add_primitive("object", PRIMITIVE_RULES.at("object"))); } else { diff --git a/examples/json-schema-pydantic-example.py b/examples/json-schema-pydantic-example.py index cc64e572bac07..2240188cd031e 100644 --- a/examples/json-schema-pydantic-example.py +++ b/examples/json-schema-pydantic-example.py @@ -53,6 +53,7 @@ class QAPair(BaseModel): question: str concise_answer: str justification: str + stars: Annotated[int, Field(ge=1, le=5)] class PyramidalSummary(BaseModel): title: str diff --git a/examples/json_schema_to_grammar.py b/examples/json_schema_to_grammar.py index b588497b99f90..86500a8c3c238 100755 --- a/examples/json_schema_to_grammar.py +++ b/examples/json_schema_to_grammar.py @@ -4,7 +4,7 @@ import json import re import sys -from typing import Any, Dict, List, Set, Tuple, Union +from typing import Any, Callable, Dict, List, Optional, Set, Tuple, Union def _build_repetition(item_rule, min_items, max_items, separator_rule=None): @@ -23,6 +23,170 @@ def _build_repetition(item_rule, min_items, max_items, separator_rule=None): result = item_rule + ' ' + _build_repetition(f'({separator_rule} {item_rule})', min_items - 1 if min_items > 0 else 0, max_items - 1 if max_items is not None else None) return f'({result})?' if min_items == 0 else result +def _generate_min_max_int(min_value: Optional[int], max_value: Optional[int], out: list, decimals_left: int = 16, top_level: bool = True): + has_min = min_value != None + has_max = max_value != None + + def digit_range(from_char: str, to_char: str): + out.append("[") + if from_char == to_char: + out.append(from_char) + else: + out.append(from_char) + out.append("-") + out.append(to_char) + out.append("]") + + def more_digits(min_digits: int, max_digits: int): + out.append("[0-9]") + if min_digits == max_digits and min_digits == 1: + return + out.append("{") + out.append(str(min_digits)) + if max_digits != min_digits: + out.append(",") + if max_digits != sys.maxsize: + out.append(str(max_digits)) + out.append("}") + + def uniform_range(from_str: str, to_str: str): + i = 0 + while i < len(from_str) and from_str[i] == to_str[i]: + i += 1 + if i > 0: + out.append("\"") + out.append(from_str[:i]) + out.append("\"") + if i < len(from_str): + if i > 0: + out.append(" ") + sub_len = len(from_str) - i - 1 + if sub_len > 0: + from_sub = from_str[i+1:] + to_sub = to_str[i+1:] + sub_zeros = "0" * sub_len + sub_nines = "9" * sub_len + + to_reached = False + out.append("(") + if from_sub == sub_zeros: + digit_range(from_str[i], chr(ord(to_str[i]) - 1)) + out.append(" ") + more_digits(sub_len, sub_len) + else: + out.append("[") + out.append(from_str[i]) + out.append("] ") + out.append("(") + uniform_range(from_sub, sub_nines) + out.append(")") + if ord(from_str[i]) < ord(to_str[i]) - 1: + out.append(" | ") + if to_sub == sub_nines: + digit_range(chr(ord(from_str[i]) + 1), to_str[i]) + to_reached = True + else: + digit_range(chr(ord(from_str[i]) + 1), chr(ord(to_str[i]) - 1)) + out.append(" ") + more_digits(sub_len, sub_len) + if not to_reached: + out.append(" | ") + digit_range(to_str[i], to_str[i]) + out.append(" ") + uniform_range(sub_zeros, to_sub) + out.append(")") + else: + out.append("[") + out.append(from_str[i]) + out.append("-") + out.append(to_str[i]) + out.append("]") + + if has_min and has_max: + if min_value < 0 and max_value < 0: + out.append("\"-\" (") + _generate_min_max_int(-max_value, -min_value, out, decimals_left, top_level=True) + out.append(")") + return + + if min_value < 0: + out.append("\"-\" (") + _generate_min_max_int(0, -min_value, out, decimals_left, top_level=True) + out.append(") | ") + min_value = 0 + + min_s = str(min_value) + max_s = str(max_value) + min_digits = len(min_s) + max_digits = len(max_s) + + for digits in range(min_digits, max_digits): + uniform_range(min_s, "9" * digits) + min_s = "1" + "0" * digits + out.append(" | ") + uniform_range(min_s, max_s) + return + + less_decimals = max(decimals_left - 1, 1) + + if has_min: + if min_value < 0: + out.append("\"-\" (") + _generate_min_max_int(None, -min_value, out, decimals_left, top_level=False) + out.append(") | [0] | [1-9] ") + more_digits(0, decimals_left - 1) + elif min_value == 0: + if top_level: + out.append("[0] | [1-9] ") + more_digits(0, less_decimals) + else: + more_digits(1, decimals_left) + elif min_value <= 9: + c = str(min_value) + range_start = '1' if top_level else '0' + if c > range_start: + digit_range(range_start, chr(ord(c) - 1)) + out.append(" ") + more_digits(1, less_decimals) + out.append(" | ") + digit_range(c, "9") + out.append(" ") + more_digits(0, less_decimals) + else: + min_s = str(min_value) + length = len(min_s) + c = min_s[0] + + if c > "1": + digit_range("1" if top_level else "0", chr(ord(c) - 1)) + out.append(" ") + more_digits(length, less_decimals) + out.append(" | ") + digit_range(c, c) + out.append(" (") + _generate_min_max_int(int(min_s[1:]), None, out, less_decimals, top_level=False) + out.append(")") + if c < "9": + out.append(" | ") + digit_range(chr(ord(c) + 1), "9") + out.append(" ") + more_digits(length - 1, less_decimals) + return + + if has_max: + if max_value >= 0: + if top_level: + out.append("\"-\" [1-9] ") + more_digits(0, less_decimals) + out.append(" | ") + _generate_min_max_int(0, max_value, out, decimals_left, top_level=True) + else: + out.append("\"-\" (") + _generate_min_max_int(-max_value, None, out, decimals_left, top_level=False) + out.append(")") + return + + raise RuntimeError("At least one of min_value or max_value must be set") class BuiltinRule: def __init__(self, content: str, deps: list = None): @@ -432,6 +596,24 @@ def add_component(comp_schema, is_required): return self._add_rule(rule_name, r'"\"" ' + _build_repetition(char_rule, min_len, max_len) + r' "\"" space') + elif schema_type in (None, 'integer') and \ + ('minimum' in schema or 'exclusiveMinimum' in schema or 'maximum' in schema or 'exclusiveMaximum' in schema): + min_value = None + max_value = None + if 'minimum' in schema: + min_value = schema['minimum'] + elif 'exclusiveMinimum' in schema: + min_value = schema['exclusiveMinimum'] + 1 + if 'maximum' in schema: + max_value = schema['maximum'] + elif 'exclusiveMaximum' in schema: + max_value = schema['exclusiveMaximum'] - 1 + + out = ["("] + _generate_min_max_int(min_value, max_value, out) + out.append(") space") + return self._add_rule(rule_name, ''.join(out)) + elif (schema_type == 'object') or (len(schema) == 0): return self._add_rule(rule_name, self._add_primitive('object', PRIMITIVE_RULES['object'])) diff --git a/examples/server/public/json-schema-to-grammar.mjs b/examples/server/public/json-schema-to-grammar.mjs index faed6a32cfc4c..f340f94bd75bc 100644 --- a/examples/server/public/json-schema-to-grammar.mjs +++ b/examples/server/public/json-schema-to-grammar.mjs @@ -24,6 +24,201 @@ function _buildRepetition(itemRule, minItems, maxItems, opts={}) { return minItems === 0 ? `(${result})?` : result; } +function _generateMinMaxInt(minValue, maxValue, out, decimalsLeft = 16, topLevel = true) { + const hasMin = minValue !== null; + const hasMax = maxValue !== null; + + function digitRange(fromChar, toChar) { + out.push("["); + if (fromChar === toChar) { + out.push(fromChar); + } else { + out.push(fromChar); + out.push("-"); + out.push(toChar); + } + out.push("]"); + } + + function moreDigits(minDigits, maxDigits) { + out.push("[0-9]"); + if (minDigits === maxDigits && minDigits === 1) { + return; + } + out.push("{"); + out.push(minDigits.toString()); + if (maxDigits !== minDigits) { + out.push(","); + if (maxDigits !== Number.MAX_SAFE_INTEGER) { + out.push(maxDigits.toString()); + } + } + out.push("}"); + } + + function uniformRange(fromStr, toStr) { + let i = 0; + while (i < fromStr.length && fromStr[i] === toStr[i]) { + i++; + } + if (i > 0) { + out.push("\""); + out.push(fromStr.slice(0, i)); + out.push("\""); + } + if (i < fromStr.length) { + if (i > 0) { + out.push(" "); + } + const subLen = fromStr.length - i - 1; + if (subLen > 0) { + const fromSub = fromStr.slice(i + 1); + const toSub = toStr.slice(i + 1); + const subZeros = "0".repeat(subLen); + const subNines = "9".repeat(subLen); + + let toReached = false; + out.push("("); + if (fromSub === subZeros) { + digitRange(fromStr[i], String.fromCharCode(toStr.charCodeAt(i) - 1)); + out.push(" "); + moreDigits(subLen, subLen); + } else { + out.push("["); + out.push(fromStr[i]); + out.push("] "); + out.push("("); + uniformRange(fromSub, subNines); + out.push(")"); + if (fromStr.charCodeAt(i) < toStr.charCodeAt(i) - 1) { + out.push(" | "); + if (toSub === subNines) { + digitRange(String.fromCharCode(fromStr.charCodeAt(i) + 1), toStr[i]); + toReached = true; + } else { + digitRange(String.fromCharCode(fromStr.charCodeAt(i) + 1), String.fromCharCode(toStr.charCodeAt(i) - 1)); + } + out.push(" "); + moreDigits(subLen, subLen); + } + } + if (!toReached) { + out.push(" | "); + digitRange(toStr[i], toStr[i]); + out.push(" "); + uniformRange(subZeros, toSub); + } + out.push(")"); + } else { + out.push("["); + out.push(fromStr[i]); + out.push("-"); + out.push(toStr[i]); + out.push("]"); + } + } + } + + if (hasMin && hasMax) { + if (minValue < 0 && maxValue < 0) { + out.push("\"-\" ("); + _generateMinMaxInt(-maxValue, -minValue, out, decimalsLeft, true); + out.push(")"); + return; + } + + if (minValue < 0) { + out.push("\"-\" ("); + _generateMinMaxInt(0, -minValue, out, decimalsLeft, true); + out.push(") | "); + minValue = 0; + } + + let minS = minValue.toString(); + const maxS = maxValue.toString(); + const minDigits = minS.length; + const maxDigits = maxS.length; + + for (let digits = minDigits; digits < maxDigits; digits++) { + uniformRange(minS, "9".repeat(digits)); + minS = "1" + "0".repeat(digits); + out.push(" | "); + } + uniformRange(minS, maxS); + return; + } + + const lessDecimals = Math.max(decimalsLeft - 1, 1); + + if (hasMin) { + if (minValue < 0) { + out.push("\"-\" ("); + _generateMinMaxInt(null, -minValue, out, decimalsLeft, false); + out.push(") | [0] | [1-9] "); + moreDigits(0, decimalsLeft - 1); + } else if (minValue === 0) { + if (topLevel) { + out.push("[0] | [1-9] "); + moreDigits(0, lessDecimals); + } else { + moreDigits(1, decimalsLeft); + } + } else if (minValue <= 9) { + const c = minValue.toString(); + const range_start = topLevel ? '1' : '0'; + if (c > range_start) { + digitRange(range_start, String.fromCharCode(c.charCodeAt(0) - 1)); + out.push(" "); + moreDigits(1, lessDecimals); + out.push(" | "); + } + digitRange(c, "9"); + out.push(" "); + moreDigits(0, lessDecimals); + } else { + const minS = minValue.toString(); + const length = minS.length; + const c = minS[0]; + + if (c > "1") { + digitRange(topLevel ? "1" : "0", String.fromCharCode(c.charCodeAt(0) - 1)); + out.push(" "); + moreDigits(length, lessDecimals); + out.push(" | "); + } + digitRange(c, c); + out.push(" ("); + _generateMinMaxInt(parseInt(minS.slice(1)), null, out, lessDecimals, false); + out.push(")"); + if (c < "9") { + out.push(" | "); + digitRange(String.fromCharCode(c.charCodeAt(0) + 1), "9"); + out.push(" "); + moreDigits(length - 1, lessDecimals); + } + } + return; + } + + if (hasMax) { + if (maxValue >= 0) { + if (topLevel) { + out.push("\"-\" [1-9] "); + moreDigits(0, lessDecimals); + out.push(" | "); + } + _generateMinMaxInt(0, maxValue, out, decimalsLeft, true); + } else { + out.push("\"-\" ("); + _generateMinMaxInt(-maxValue, null, out, decimalsLeft, false); + out.push(")"); + } + return; + } + + throw new Error("At least one of minValue or maxValue must be set"); +} + class BuiltinRule { constructor(content, deps) { this.content = content; @@ -435,6 +630,24 @@ export class SchemaConverter { const minLen = schema.minLength || 0; const maxLen = schema.maxLength; return this._addRule(ruleName, '"\\\"" ' + _buildRepetition(charRuleName, minLen, maxLen) + ' "\\\"" space'); + } else if (schemaType === 'integer' && ('minimum' in schema || 'exclusiveMinimum' in schema || 'maximum' in schema || 'exclusiveMaximum' in schema)) { + let minValue = null; + let maxValue = null; + if ('minimum' in schema) { + minValue = schema.minimum; + } else if ('exclusiveMinimum' in schema) { + minValue = schema.exclusiveMinimum + 1; + } + if ('maximum' in schema) { + maxValue = schema.maximum; + } else if ('exclusiveMaximum' in schema) { + maxValue = schema.exclusiveMaximum - 1; + } + + const out = ["("]; + _generateMinMaxInt(minValue, maxValue, out); + out.push(") space"); + return this._addRule(ruleName, out.join('')); } else if ((schemaType === 'object') || (Object.keys(schema).length === 0)) { return this._addRule(ruleName, this._addPrimitive('object', PRIMITIVE_RULES['object'])); } else { diff --git a/tests/test-grammar-integration.cpp b/tests/test-grammar-integration.cpp index 96f90c01e0d97..5b3992236c26c 100644 --- a/tests/test-grammar-integration.cpp +++ b/tests/test-grammar-integration.cpp @@ -148,6 +148,250 @@ static void test_schema(const std::string & test_desc, const std::string & schem } static void test_simple_grammar() { + test_schema( + "min 0", + R"""({ + "type": "integer", + "minimum": 0 + })""", + // Passing strings + { + "0", + "10", + "12", + "10000", + }, + // Failing strings + { + "-1", + "-10", + "-10000", + "-100000000000000000000000000000000", + "100000000000000000000000000000000", + "00", + "01", + "-0", + } + ); + test_schema( + "min 2", + // Schema + R"""({ + "type": "integer", + "minimum": 2 + })""", + // Passing strings + { + "2", + "3", + "4", + "10", + "20", + "1234567890000000", + }, + // Failing strings + { + "0", + "1", + "-1", + "-100", + "0", + "1", + "01", + "02", + "12345678900000000", + } + ); + test_schema( + "min 456", + R"""({ + "type": "integer", + "minimum": 456 + })""", + // Passing strings + { + "456", + "4560", + "457", + "460", + "500", + }, + // Failing strings + { + "455", + "356", + "50", + "050", + "-1", + "-456", + } + ); + test_schema( + "min -123", + R"""({ + "type": "integer", + "minimum": -123 + })""", + // Passing strings + { + "-123", + "-122", + "-11", + "-1", + "0", + "1", + "123", + "1234", + "2345", + }, + // Failing strings + { + "-1234", + "-124", + } + ); + + test_schema( + "max 9999", + // Schema + R"""({ + "type": "integer", + "maximum": 9999 + })""", + // Passing strings + { + "-99999", + "0", + "9999", + }, + // Failing strings + { + "10000", + "99991", + } + ); + test_schema( + "max -9999", + // Schema + R"""({ + "type": "integer", + "maximum": -9999 + })""", + // Passing strings + { + "-10000", + "-9999", + }, + // Failing strings + { + "-9998", + "0", + "9999", + } + ); + test_schema( + "min 5 max 30", + // Schema + R"""({ + "type": "integer", + "minimum": 5, + "maximum": 30 + })""", + // Passing strings + { + "5", + "10", + "30", + }, + // Failing strings + { + "05", + "4", + "-1", + "31", + "123", + "0123", + } + ); + test_schema( + "min -1 max 1", + R"""({ + "type": "integer", + "minimum": -1, + "maximum": 1 + })""", + // Passing strings + { + "-1", + "0", + "1", + }, + // Failing strings + { + "-11", + "-10", + "-2", + "2", + "10", + "11", + } + ); + test_schema( + "min -123 max 42", + R"""({ + "type": "integer", + "minimum": -123, + "maximum": 42 + })""", + // Passing strings + { + "-123", + "-122", + "-13", + "-11", + "-2", + "-1", + "0", + "1", + "5", + "10", + "39", + "40", + "42", + }, + // Failing strings + { + "-0123", + "-124", + "-1123", + "-200", + "43", + "123", + "0123", + } + ); + test_schema( + "exclusive min / max", + // Schema + R"""({ + "type": "integer", + "exclusiveMinimum": 0, + "exclusiveMaximum": 10000 + })""", + // Passing strings + { + "1", + "9999", + }, + // Failing strings + { + "0", + "01", + "10000", + "99999", + } + ); + // Test case for a simple grammar test_grammar( "simple grammar", @@ -773,7 +1017,6 @@ static void test_json_schema() { } ); - test_schema( "min+max items", // Schema diff --git a/tests/test-json-schema-to-grammar.cpp b/tests/test-json-schema-to-grammar.cpp index 87bc66b691784..2e591bd71abaa 100755 --- a/tests/test-json-schema-to-grammar.cpp +++ b/tests/test-json-schema-to-grammar.cpp @@ -80,6 +80,232 @@ static void test_all(const std::string & lang, std::function Date: Tue, 25 Jun 2024 21:07:28 +0200 Subject: [PATCH 11/15] llama : return nullptr from llama_grammar_init (#8093) * llama : return nullptr from llama_grammar_init This commit updates llama_grammar_init to return nullptr instead of throwing an exception. The motivation for this is that this function is declared inside an extern "C" block and is intended/may be used from C code which will not be able to handle exceptions thrown, and results in undefined behavior. On Windows and using MSVC the following warning is currently generated: ```console C:\llama.cpp\llama.cpp(13998,1): warning C4297: 'llama_grammar_init': function assumed not to throw an exception but does C:\llama.cpp\llama.cpp(13998,1): message : __declspec(nothrow), throw(), noexcept(true), or noexcept was specified on the function ``` Signed-off-by: Daniel Bevenius * squash! llama : return nullptr from llama_grammar_init Add checks for nullptr when calling llama_grammar_init. Signed-off-by: Daniel Bevenius --------- Signed-off-by: Daniel Bevenius Co-authored-by: Clint Herron --- common/sampling.cpp | 12 ++++++++++-- examples/gbnf-validator/gbnf-validator.cpp | 4 +++- llama.cpp | 3 ++- llama.h | 6 ++++++ tests/test-grammar-integration.cpp | 6 +++--- tests/test-llama-grammar.cpp | 4 ++++ 6 files changed, 28 insertions(+), 7 deletions(-) diff --git a/common/sampling.cpp b/common/sampling.cpp index f1f80351637f0..9f332fe573683 100644 --- a/common/sampling.cpp +++ b/common/sampling.cpp @@ -28,9 +28,13 @@ struct llama_sampling_context * llama_sampling_init(const struct llama_sampling_ std::vector grammar_rules(result->parsed_grammar.c_rules()); - result->grammar = llama_grammar_init( + struct llama_grammar * grammar = llama_grammar_init( grammar_rules.data(), grammar_rules.size(), result->parsed_grammar.symbol_ids.at("root")); + if (grammar == nullptr) { + throw std::runtime_error("Failed to initialize llama_grammar"); + } + result->grammar = grammar; } result->prev.resize(params.n_prev); @@ -59,9 +63,13 @@ void llama_sampling_reset(llama_sampling_context * ctx) { if (!ctx->parsed_grammar.rules.empty()) { std::vector grammar_rules(ctx->parsed_grammar.c_rules()); - ctx->grammar = llama_grammar_init( + struct llama_grammar * grammar = llama_grammar_init( grammar_rules.data(), grammar_rules.size(), ctx->parsed_grammar.symbol_ids.at("root")); + if (grammar == nullptr) { + throw std::runtime_error("Failed to initialize llama_grammar"); + } + ctx->grammar = grammar; } std::fill(ctx->prev.begin(), ctx->prev.end(), 0); diff --git a/examples/gbnf-validator/gbnf-validator.cpp b/examples/gbnf-validator/gbnf-validator.cpp index 0406dc3398b8a..dd53ba9b1d551 100644 --- a/examples/gbnf-validator/gbnf-validator.cpp +++ b/examples/gbnf-validator/gbnf-validator.cpp @@ -101,7 +101,9 @@ int main(int argc, char** argv) { auto grammar = llama_grammar_init( grammar_rules.data(), grammar_rules.size(), parsed_grammar.symbol_ids.at("root")); - + if (grammar == nullptr) { + throw std::runtime_error("Failed to initialize llama_grammar"); + } // Read the input file std::string input_str; { diff --git a/llama.cpp b/llama.cpp index 33e6cb7229aab..dd2823e65c4b7 100644 --- a/llama.cpp +++ b/llama.cpp @@ -14500,7 +14500,8 @@ struct llama_grammar * llama_grammar_init( continue; } if (llama_grammar_detect_left_recursion(vec_rules, i, &rules_visited, &rules_in_progress, &rules_may_be_empty)) { - throw std::runtime_error(format("unsupported grammar, left recursion detected for nonterminal at index %zu", i)); + LLAMA_LOG_ERROR("unsupported grammar, left recursion detected for nonterminal at index %zu", i); + return nullptr; } } diff --git a/llama.h b/llama.h index 53e06d9db5273..82d15747f4662 100644 --- a/llama.h +++ b/llama.h @@ -924,6 +924,12 @@ extern "C" { // Grammar // + /// Initialize a llama_grammar. + /// + /// @param rules The rule elements of the grammar to initialize. + /// @param n_rules The number of rules. + /// @param start_rule_index The index of the root rule (the starting point of the grammar). + /// @return The initialized llama_grammar or nullptr if initialization failed. LLAMA_API struct llama_grammar * llama_grammar_init( const llama_grammar_element ** rules, size_t n_rules, diff --git a/tests/test-grammar-integration.cpp b/tests/test-grammar-integration.cpp index 5b3992236c26c..5750d362a7247 100644 --- a/tests/test-grammar-integration.cpp +++ b/tests/test-grammar-integration.cpp @@ -36,10 +36,10 @@ static llama_grammar* build_grammar(const std::string & grammar_str) { static bool test_build_grammar_fails(const std::string & grammar_str) { fprintf(stderr, "⚫ Testing failure for grammar: %s\n", grammar_str.c_str()); bool grammar_fails = false; - try { - build_grammar(grammar_str); + llama_grammar * grammar = build_grammar(grammar_str); + if (grammar != nullptr) { fprintf(stderr, " ❌ Expected build failure, but succeeded\n"); - } catch (const std::exception & err) { + } else { grammar_fails = true; fprintf(stdout, " ✅︎\n"); } diff --git a/tests/test-llama-grammar.cpp b/tests/test-llama-grammar.cpp index 27ca4d2656c5d..c8badb2063076 100644 --- a/tests/test-llama-grammar.cpp +++ b/tests/test-llama-grammar.cpp @@ -116,6 +116,10 @@ int main() std::vector grammar_rules(parsed_grammar.c_rules()); grammar = llama_grammar_init( grammar_rules.data(), grammar_rules.size(), parsed_grammar.symbol_ids.at("root")); + if (grammar == nullptr) + { + throw std::runtime_error("Failed to initialize llama_grammar"); + } std::vector> expected_stacks = { { From 6fcbf6823553efabe52ed83e3c2a3329aa3387d1 Mon Sep 17 00:00:00 2001 From: fairydreaming <166155368+fairydreaming@users.noreply.github.com> Date: Tue, 25 Jun 2024 21:14:35 +0200 Subject: [PATCH 12/15] llama : implement Unigram tokenizer needed by T5 and FLAN-T5 model families (#5763) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * llama : add T5 model architecture, tensors and model header parameters * llama : add implementation of Unigram tokenizer with SentencePiece-like text normalization using precompiled charsmap --------- Co-authored-by: Stanisław Szymczyk --- llama.cpp | 621 ++++++++++++++++++++++++++++++++++++++++++++++++---- llama.h | 2 + unicode.cpp | 2 +- unicode.h | 1 + 4 files changed, 587 insertions(+), 39 deletions(-) diff --git a/llama.cpp b/llama.cpp index dd2823e65c4b7..78a21008f9338 100644 --- a/llama.cpp +++ b/llama.cpp @@ -226,6 +226,7 @@ enum llm_arch { LLM_ARCH_ARCTIC, LLM_ARCH_DEEPSEEK2, LLM_ARCH_BITNET, + LLM_ARCH_T5, LLM_ARCH_UNKNOWN, }; @@ -265,6 +266,7 @@ static const std::map LLM_ARCH_NAMES = { { LLM_ARCH_ARCTIC, "arctic" }, { LLM_ARCH_DEEPSEEK2, "deepseek2" }, { LLM_ARCH_BITNET, "bitnet" }, + { LLM_ARCH_T5, "t5" }, { LLM_ARCH_UNKNOWN, "(unknown)" }, }; @@ -297,6 +299,7 @@ enum llm_kv { LLM_KV_EXPERT_WEIGHTS_SCALE, LLM_KV_POOLING_TYPE, LLM_KV_LOGIT_SCALE, + LLM_KV_DECODER_START_TOKEN_ID, LLM_KV_ATTENTION_HEAD_COUNT, LLM_KV_ATTENTION_HEAD_COUNT_KV, @@ -309,6 +312,7 @@ enum llm_kv { LLM_KV_ATTENTION_CAUSAL, LLM_KV_ATTENTION_Q_LORA_RANK, LLM_KV_ATTENTION_KV_LORA_RANK, + LLM_KV_ATTENTION_RELATIVE_BUCKETS_COUNT, LLM_KV_ROPE_DIMENSION_COUNT, LLM_KV_ROPE_FREQ_BASE, @@ -346,6 +350,8 @@ enum llm_kv { LLM_KV_TOKENIZER_ADD_BOS, LLM_KV_TOKENIZER_ADD_EOS, LLM_KV_TOKENIZER_ADD_PREFIX, + LLM_KV_TOKENIZER_REMOVE_EXTRA_WS, + LLM_KV_TOKENIZER_PRECOMPILED_CHARSMAP, LLM_KV_TOKENIZER_HF_JSON, LLM_KV_TOKENIZER_RWKV, LLM_KV_TOKENIZER_PREFIX_ID, @@ -383,18 +389,20 @@ static const std::map LLM_KV_NAMES = { { LLM_KV_EXPERT_WEIGHTS_SCALE, "%s.expert_weights_scale" }, { LLM_KV_POOLING_TYPE , "%s.pooling_type" }, { LLM_KV_LOGIT_SCALE, "%s.logit_scale" }, - - { LLM_KV_ATTENTION_HEAD_COUNT, "%s.attention.head_count" }, - { LLM_KV_ATTENTION_HEAD_COUNT_KV, "%s.attention.head_count_kv" }, - { LLM_KV_ATTENTION_MAX_ALIBI_BIAS, "%s.attention.max_alibi_bias" }, - { LLM_KV_ATTENTION_CLAMP_KQV, "%s.attention.clamp_kqv" }, - { LLM_KV_ATTENTION_KEY_LENGTH, "%s.attention.key_length" }, - { LLM_KV_ATTENTION_VALUE_LENGTH, "%s.attention.value_length" }, - { LLM_KV_ATTENTION_LAYERNORM_EPS, "%s.attention.layer_norm_epsilon" }, - { LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, "%s.attention.layer_norm_rms_epsilon" }, - { LLM_KV_ATTENTION_CAUSAL, "%s.attention.causal" }, - { LLM_KV_ATTENTION_Q_LORA_RANK, "%s.attention.q_lora_rank" }, - { LLM_KV_ATTENTION_KV_LORA_RANK, "%s.attention.kv_lora_rank" }, + { LLM_KV_DECODER_START_TOKEN_ID, "%s.decoder_start_token_id" }, + + { LLM_KV_ATTENTION_HEAD_COUNT, "%s.attention.head_count" }, + { LLM_KV_ATTENTION_HEAD_COUNT_KV, "%s.attention.head_count_kv" }, + { LLM_KV_ATTENTION_MAX_ALIBI_BIAS, "%s.attention.max_alibi_bias" }, + { LLM_KV_ATTENTION_CLAMP_KQV, "%s.attention.clamp_kqv" }, + { LLM_KV_ATTENTION_KEY_LENGTH, "%s.attention.key_length" }, + { LLM_KV_ATTENTION_VALUE_LENGTH, "%s.attention.value_length" }, + { LLM_KV_ATTENTION_LAYERNORM_EPS, "%s.attention.layer_norm_epsilon" }, + { LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, "%s.attention.layer_norm_rms_epsilon" }, + { LLM_KV_ATTENTION_CAUSAL, "%s.attention.causal" }, + { LLM_KV_ATTENTION_Q_LORA_RANK, "%s.attention.q_lora_rank" }, + { LLM_KV_ATTENTION_KV_LORA_RANK, "%s.attention.kv_lora_rank" }, + { LLM_KV_ATTENTION_RELATIVE_BUCKETS_COUNT, "%s.attention.relative_buckets_count" }, { LLM_KV_ROPE_DIMENSION_COUNT, "%s.rope.dimension_count" }, { LLM_KV_ROPE_FREQ_BASE, "%s.rope.freq_base" }, @@ -415,29 +423,31 @@ static const std::map LLM_KV_NAMES = { { LLM_KV_SSM_STATE_SIZE, "%s.ssm.state_size" }, { LLM_KV_SSM_TIME_STEP_RANK, "%s.ssm.time_step_rank" }, - { LLM_KV_TOKENIZER_MODEL, "tokenizer.ggml.model" }, - { LLM_KV_TOKENIZER_PRE, "tokenizer.ggml.pre" }, - { LLM_KV_TOKENIZER_LIST, "tokenizer.ggml.tokens" }, - { LLM_KV_TOKENIZER_TOKEN_TYPE, "tokenizer.ggml.token_type" }, - { LLM_KV_TOKENIZER_TOKEN_TYPE_COUNT, "tokenizer.ggml.token_type_count" }, - { LLM_KV_TOKENIZER_SCORES, "tokenizer.ggml.scores" }, - { LLM_KV_TOKENIZER_MERGES, "tokenizer.ggml.merges" }, - { LLM_KV_TOKENIZER_BOS_ID, "tokenizer.ggml.bos_token_id" }, - { LLM_KV_TOKENIZER_EOS_ID, "tokenizer.ggml.eos_token_id" }, - { LLM_KV_TOKENIZER_UNK_ID, "tokenizer.ggml.unknown_token_id" }, - { LLM_KV_TOKENIZER_SEP_ID, "tokenizer.ggml.seperator_token_id" }, - { LLM_KV_TOKENIZER_PAD_ID, "tokenizer.ggml.padding_token_id" }, - { LLM_KV_TOKENIZER_CLS_ID, "tokenizer.ggml.cls_token_id" }, - { LLM_KV_TOKENIZER_MASK_ID, "tokenizer.ggml.mask_token_id" }, - { LLM_KV_TOKENIZER_ADD_BOS, "tokenizer.ggml.add_bos_token" }, - { LLM_KV_TOKENIZER_ADD_EOS, "tokenizer.ggml.add_eos_token" }, - { LLM_KV_TOKENIZER_ADD_PREFIX, "tokenizer.ggml.add_space_prefix" }, - { LLM_KV_TOKENIZER_HF_JSON, "tokenizer.huggingface.json" }, - { LLM_KV_TOKENIZER_RWKV, "tokenizer.rwkv.world" }, - { LLM_KV_TOKENIZER_PREFIX_ID, "tokenizer.ggml.prefix_token_id" }, - { LLM_KV_TOKENIZER_SUFFIX_ID, "tokenizer.ggml.suffix_token_id" }, - { LLM_KV_TOKENIZER_MIDDLE_ID, "tokenizer.ggml.middle_token_id" }, - { LLM_KV_TOKENIZER_EOT_ID, "tokenizer.ggml.eot_token_id" }, + { LLM_KV_TOKENIZER_MODEL, "tokenizer.ggml.model" }, + { LLM_KV_TOKENIZER_PRE, "tokenizer.ggml.pre" }, + { LLM_KV_TOKENIZER_LIST, "tokenizer.ggml.tokens" }, + { LLM_KV_TOKENIZER_TOKEN_TYPE, "tokenizer.ggml.token_type" }, + { LLM_KV_TOKENIZER_TOKEN_TYPE_COUNT, "tokenizer.ggml.token_type_count" }, + { LLM_KV_TOKENIZER_SCORES, "tokenizer.ggml.scores" }, + { LLM_KV_TOKENIZER_MERGES, "tokenizer.ggml.merges" }, + { LLM_KV_TOKENIZER_BOS_ID, "tokenizer.ggml.bos_token_id" }, + { LLM_KV_TOKENIZER_EOS_ID, "tokenizer.ggml.eos_token_id" }, + { LLM_KV_TOKENIZER_UNK_ID, "tokenizer.ggml.unknown_token_id" }, + { LLM_KV_TOKENIZER_SEP_ID, "tokenizer.ggml.seperator_token_id" }, + { LLM_KV_TOKENIZER_PAD_ID, "tokenizer.ggml.padding_token_id" }, + { LLM_KV_TOKENIZER_CLS_ID, "tokenizer.ggml.cls_token_id" }, + { LLM_KV_TOKENIZER_MASK_ID, "tokenizer.ggml.mask_token_id" }, + { LLM_KV_TOKENIZER_ADD_BOS, "tokenizer.ggml.add_bos_token" }, + { LLM_KV_TOKENIZER_ADD_EOS, "tokenizer.ggml.add_eos_token" }, + { LLM_KV_TOKENIZER_ADD_PREFIX, "tokenizer.ggml.add_space_prefix" }, + { LLM_KV_TOKENIZER_REMOVE_EXTRA_WS, "tokenizer.ggml.remove_extra_whitespaces" }, + { LLM_KV_TOKENIZER_PRECOMPILED_CHARSMAP, "tokenizer.ggml.precompiled_charsmap" }, + { LLM_KV_TOKENIZER_HF_JSON, "tokenizer.huggingface.json" }, + { LLM_KV_TOKENIZER_RWKV, "tokenizer.rwkv.world" }, + { LLM_KV_TOKENIZER_PREFIX_ID, "tokenizer.ggml.prefix_token_id" }, + { LLM_KV_TOKENIZER_SUFFIX_ID, "tokenizer.ggml.suffix_token_id" }, + { LLM_KV_TOKENIZER_MIDDLE_ID, "tokenizer.ggml.middle_token_id" }, + { LLM_KV_TOKENIZER_EOT_ID, "tokenizer.ggml.eot_token_id" }, }; struct LLM_KV { @@ -504,6 +514,34 @@ enum llm_tensor { LLM_TENSOR_ATTN_KV_A_NORM, LLM_TENSOR_ATTN_SUB_NORM, LLM_TENSOR_FFN_SUB_NORM, + LLM_TENSOR_DEC_ATTN_NORM, + LLM_TENSOR_DEC_ATTN_Q, + LLM_TENSOR_DEC_ATTN_K, + LLM_TENSOR_DEC_ATTN_V, + LLM_TENSOR_DEC_ATTN_OUT, + LLM_TENSOR_DEC_ATTN_REL_B, + LLM_TENSOR_DEC_CROSS_ATTN_NORM, + LLM_TENSOR_DEC_CROSS_ATTN_Q, + LLM_TENSOR_DEC_CROSS_ATTN_K, + LLM_TENSOR_DEC_CROSS_ATTN_V, + LLM_TENSOR_DEC_CROSS_ATTN_OUT, + LLM_TENSOR_DEC_CROSS_ATTN_REL_B, + LLM_TENSOR_DEC_FFN_NORM, + LLM_TENSOR_DEC_FFN_GATE, + LLM_TENSOR_DEC_FFN_DOWN, + LLM_TENSOR_DEC_FFN_UP, + LLM_TENSOR_DEC_OUTPUT_NORM, + LLM_TENSOR_ENC_ATTN_NORM, + LLM_TENSOR_ENC_ATTN_Q, + LLM_TENSOR_ENC_ATTN_K, + LLM_TENSOR_ENC_ATTN_V, + LLM_TENSOR_ENC_ATTN_OUT, + LLM_TENSOR_ENC_ATTN_REL_B, + LLM_TENSOR_ENC_FFN_NORM, + LLM_TENSOR_ENC_FFN_GATE, + LLM_TENSOR_ENC_FFN_DOWN, + LLM_TENSOR_ENC_FFN_UP, + LLM_TENSOR_ENC_OUTPUT_NORM, }; static const std::map> LLM_TENSOR_NAMES = { @@ -1135,6 +1173,41 @@ static const std::map> LLM_TENSOR_NA { LLM_TENSOR_FFN_SUB_NORM, "blk.%d.ffn_sub_norm" }, }, }, + { + LLM_ARCH_T5, + { + { LLM_TENSOR_TOKEN_EMBD, "token_embd" }, + { LLM_TENSOR_OUTPUT, "output" }, + { LLM_TENSOR_DEC_OUTPUT_NORM, "dec.output_norm" }, + { LLM_TENSOR_DEC_ATTN_NORM, "dec.blk.%d.attn_norm" }, + { LLM_TENSOR_DEC_ATTN_Q, "dec.blk.%d.attn_q" }, + { LLM_TENSOR_DEC_ATTN_K, "dec.blk.%d.attn_k" }, + { LLM_TENSOR_DEC_ATTN_V, "dec.blk.%d.attn_v" }, + { LLM_TENSOR_DEC_ATTN_OUT, "dec.blk.%d.attn_o" }, + { LLM_TENSOR_DEC_ATTN_REL_B, "dec.blk.%d.attn_rel_b" }, + { LLM_TENSOR_DEC_CROSS_ATTN_NORM, "dec.blk.%d.cross_attn_norm" }, + { LLM_TENSOR_DEC_CROSS_ATTN_Q, "dec.blk.%d.cross_attn_q" }, + { LLM_TENSOR_DEC_CROSS_ATTN_K, "dec.blk.%d.cross_attn_k" }, + { LLM_TENSOR_DEC_CROSS_ATTN_V, "dec.blk.%d.cross_attn_v" }, + { LLM_TENSOR_DEC_CROSS_ATTN_OUT, "dec.blk.%d.cross_attn_o" }, + { LLM_TENSOR_DEC_CROSS_ATTN_REL_B, "dec.blk.%d.cross_attn_rel_b" }, + { LLM_TENSOR_DEC_FFN_NORM, "dec.blk.%d.ffn_norm" }, + { LLM_TENSOR_DEC_FFN_GATE, "dec.blk.%d.ffn_gate" }, + { LLM_TENSOR_DEC_FFN_DOWN, "dec.blk.%d.ffn_down" }, + { LLM_TENSOR_DEC_FFN_UP, "dec.blk.%d.ffn_up" }, + { LLM_TENSOR_ENC_OUTPUT_NORM, "enc.output_norm" }, + { LLM_TENSOR_ENC_ATTN_NORM, "enc.blk.%d.attn_norm" }, + { LLM_TENSOR_ENC_ATTN_Q, "enc.blk.%d.attn_q" }, + { LLM_TENSOR_ENC_ATTN_K, "enc.blk.%d.attn_k" }, + { LLM_TENSOR_ENC_ATTN_V, "enc.blk.%d.attn_v" }, + { LLM_TENSOR_ENC_ATTN_OUT, "enc.blk.%d.attn_o" }, + { LLM_TENSOR_ENC_ATTN_REL_B, "enc.blk.%d.attn_rel_b" }, + { LLM_TENSOR_ENC_FFN_NORM, "enc.blk.%d.ffn_norm" }, + { LLM_TENSOR_ENC_FFN_GATE, "enc.blk.%d.ffn_gate" }, + { LLM_TENSOR_ENC_FFN_DOWN, "enc.blk.%d.ffn_down" }, + { LLM_TENSOR_ENC_FFN_UP, "enc.blk.%d.ffn_up" }, + }, + }, { LLM_ARCH_UNKNOWN, { @@ -2356,6 +2429,11 @@ struct llama_vocab { bool tokenizer_add_bos = false; bool tokenizer_add_eos = false; bool tokenizer_ignore_merges = false; + bool tokenizer_remove_extra_whitespaces = false; + bool tokenizer_escape_whitespaces = true; + bool tokenizer_treat_whitespace_as_suffix = false; + + std::vector precompiled_charsmap; int find_bpe_rank(const std::string & token_left, const std::string & token_right) const { GGML_ASSERT(token_left.find(' ') == std::string::npos); @@ -4191,6 +4269,7 @@ static const char * llama_model_vocab_type_name(enum llama_vocab_type type){ case LLAMA_VOCAB_TYPE_SPM: return "SPM"; case LLAMA_VOCAB_TYPE_BPE: return "BPE"; case LLAMA_VOCAB_TYPE_WPM: return "WPM"; + case LLAMA_VOCAB_TYPE_UGM: return "UGM"; default: return "unknown"; } } @@ -4870,6 +4949,45 @@ static void llm_load_vocab( vocab.special_pad_id = -1; vocab.special_cls_id = -1; vocab.special_mask_id = -1; + } else if (tokenizer_model == "t5") { + vocab.type = LLAMA_VOCAB_TYPE_UGM; + + // default special tokens + vocab.special_bos_id = -1; + vocab.special_eos_id = 1; + vocab.special_unk_id = 2; + vocab.special_sep_id = -1; + vocab.special_pad_id = 0; + vocab.special_cls_id = -1; + vocab.special_mask_id = -1; + + const int add_space_prefix_keyidx = gguf_find_key(ctx, kv(LLM_KV_TOKENIZER_ADD_PREFIX).c_str()); + if (add_space_prefix_keyidx != -1) { + vocab.tokenizer_add_space_prefix = gguf_get_val_bool(ctx, add_space_prefix_keyidx); + } // The default value of add_space_prefix is true. + + const int remove_extra_whitespaces_keyidx = gguf_find_key(ctx, kv(LLM_KV_TOKENIZER_REMOVE_EXTRA_WS).c_str()); + if (remove_extra_whitespaces_keyidx != -1) { + vocab.tokenizer_remove_extra_whitespaces = gguf_get_val_bool(ctx, remove_extra_whitespaces_keyidx); + } // The default value of remove_extra_whitespaces is false. + + const int precompiled_charsmap_keyidx = gguf_find_key(ctx, kv(LLM_KV_TOKENIZER_PRECOMPILED_CHARSMAP).c_str()); + if (precompiled_charsmap_keyidx != -1) { + size_t n_precompiled_charsmap = gguf_get_arr_n(ctx, precompiled_charsmap_keyidx); + const char * precompiled_charsmap = (const char *) gguf_get_arr_data(ctx, precompiled_charsmap_keyidx); + vocab.precompiled_charsmap.assign(precompiled_charsmap, precompiled_charsmap + n_precompiled_charsmap); +#ifdef IS_BIG_ENDIAN + // correct endiannes of data in precompiled_charsmap binary blob + uint32_t * xcda_blob_size = (uint32_t *) &vocab.precompiled_charsmap[0]; + *xcda_blob_size = __builtin_bswap32(*xcda_blob_size); + assert(*xcda_blob_size + sizeof(uint32_t) < n_precompiled_charsmap); + size_t xcda_array_size = *xcda_blob_size / sizeof(uint32_t); + uint32_t * xcda_array = (uint32_t *) &vocab.precompiled_charsmap[sizeof(uint32_t)]; + for (size_t i = 0; i < xcda_array_size; ++i) { + xcda_array[i] = __builtin_bswap32(xcda_array[i]); + } +#endif + } } else { throw std::runtime_error(format("unknown tokenizer: '%s'", tokenizer_model.c_str())); } @@ -4952,6 +5070,10 @@ static void llm_load_vocab( vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_DEFAULT; vocab.tokenizer_add_bos = true; vocab.tokenizer_add_eos = false; + } else if (vocab.type == LLAMA_VOCAB_TYPE_UGM) { + vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_DEFAULT; + vocab.tokenizer_add_bos = false; + vocab.tokenizer_add_eos = true; } else { vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_DEFAULT; } @@ -13213,12 +13335,18 @@ static bool llama_is_user_defined_token(const llama_vocab& vocab, llama_token id return vocab.id_to_token[id].attr & LLAMA_TOKEN_ATTR_USER_DEFINED; } +static bool llama_is_unused_token(const llama_vocab& vocab, llama_token id) { + GGML_ASSERT(vocab.type != LLAMA_VOCAB_TYPE_NONE); + return vocab.id_to_token[id].attr & LLAMA_TOKEN_ATTR_UNUSED; +} + static uint8_t llama_token_to_byte(const llama_vocab& vocab, llama_token id) { GGML_ASSERT(llama_vocab_get_type(vocab) != LLAMA_VOCAB_TYPE_NONE); GGML_ASSERT(llama_is_byte_token(vocab, id)); const auto & token_data = vocab.id_to_token.at(id); switch (llama_vocab_get_type(vocab)) { - case LLAMA_VOCAB_TYPE_SPM: { + case LLAMA_VOCAB_TYPE_SPM: + case LLAMA_VOCAB_TYPE_UGM: { auto buf = token_data.text.substr(3, 2); return strtol(buf.c_str(), NULL, 16); } @@ -13238,7 +13366,8 @@ static llama_token llama_byte_to_token(const llama_vocab & vocab, uint8_t ch) { GGML_ASSERT(llama_vocab_get_type(vocab) != LLAMA_VOCAB_TYPE_NONE); static const char * hex = "0123456789ABCDEF"; switch (llama_vocab_get_type(vocab)) { - case LLAMA_VOCAB_TYPE_SPM: { + case LLAMA_VOCAB_TYPE_SPM: + case LLAMA_VOCAB_TYPE_UGM: { const char buf[7] = { '<', '0', 'x', hex[ch >> 4], hex[ch & 15], '>', 0 }; auto token = vocab.token_to_id.find(buf); if (token != vocab.token_to_id.end()) { @@ -13826,6 +13955,383 @@ struct llm_tokenizer_wpm { const llama_vocab & vocab; }; +struct naive_trie { + naive_trie() : has_value(false), value(0) { + } + void insert(const char * key, size_t len, int32_t value = 0) { + if (len == 0) { + this->has_value = true; + this->value = value; + return; + } + char c = key[0]; + auto res = children.find(c); + if (res != children.end()) { + res->second.insert(key + 1, len - 1, value); + } else { + auto res = children.insert(std::make_pair(c, naive_trie())); + res.first->second.insert(key + 1, len - 1, value); + } + } + std::pair get_longest_prefix(const char * key, size_t len, size_t offset = 0) { + if (len == 0 || offset == len) { + return std::make_pair(key, offset); + } + char c = key[offset]; + auto res = children.find(c); + if (res != children.end()) { + return res->second.get_longest_prefix(key, len, offset + 1); + } else { + return std::make_pair(key, offset); + } + } + struct naive_trie * traverse(const char c) { + auto res = children.find(c); + if (res != children.end()) { + return &res->second; + } else { + return NULL; + } + } + std::map children; + bool has_value; + llama_token value; +}; + +struct llm_tokenizer_ugm { + llm_tokenizer_ugm(const llama_vocab & vocab) : vocab(vocab) { + if (vocab.precompiled_charsmap.size() > 0) { + size_t charsmap_offset = 0; + + // First four bytes of precompiled_charsmap contains length of binary + // blob containing XOR-compressed compact double array (XCDA) entries + uint32_t xcda_blob_size = *(const uint32_t *) &vocab.precompiled_charsmap[0]; + charsmap_offset += sizeof(xcda_blob_size); + if (xcda_blob_size + charsmap_offset >= vocab.precompiled_charsmap.size()) { + throw std::runtime_error("Index out of array bounds in precompiled charsmap!"); + } + + // Next xcda_blob_size bytes contain entries of XOR-compressed compact + // double array (XCDA). Each entry is bit-packed into a 32-bit integer. + xcda_array = (const uint32_t *) &vocab.precompiled_charsmap[charsmap_offset]; + xcda_array_size = xcda_blob_size / sizeof(uint32_t); + charsmap_offset += xcda_blob_size; + + // Remaining bytes of precompiled charsmap contain null-terminated + // replacement strings for prefixes matched by the XCDA. + prefix_replacements = &vocab.precompiled_charsmap[charsmap_offset]; + prefix_replacements_size = vocab.precompiled_charsmap.size() - charsmap_offset; + } + + for (unsigned int id = 0; id < vocab.id_to_token.size(); ++id) { + const auto &token_data = vocab.id_to_token[id]; + + if (llama_is_normal_token(vocab, id)) { + min_score = std::min(min_score, token_data.score); + max_score = std::max(max_score, token_data.score); + } + + if (llama_is_normal_token(vocab, id) || + llama_is_user_defined_token(vocab, id) || + llama_is_unused_token(vocab, id)) { + token_matcher.insert(token_data.text.data(), token_data.text.size(), id); + } + + if (llama_is_user_defined_token(vocab, id)) { + user_defined_token_matcher.insert(token_data.text.data(), token_data.text.size()); + } + } + + unknown_token_score = min_score - unknown_token_score_penalty; + } + + /* This implementation is based on SentencePiece optimized Viterbi algorithm for + * unigram language models. The general idea is to: + * - move along the input sequence in steps of one UTF code point, + * - at each step find all possible tokenizations of the prefix by + * traversing the tokens trie, + * - for each tokenization store the best one so far (by higher score) + * - use the position in sequence after given token as an index to store + * results + * - if there was no valid tokenization of the current UTF code point + * then use unknown token with additional score penalty + * After processing the whole sequence we backtrack from the end to get + * the best tokenization. + */ + void tokenize(const std::string & text, std::vector & output) { + // normalize the input first + std::string normalized; + normalize(text, &normalized); + size_t input_len = normalized.size(); + + // initialize score_sum to -FLT_MAX so it will be always lower than sums of token scores + std::vector tokenization_results(input_len + 1, {0, 0, -FLT_MAX}); + // at the beginning tokenization score is zero + tokenization_results[0] = { 0, 0, 0 }; + + for (size_t input_offset = 0; input_offset < input_len;) { + size_t prefix_offset = input_offset; + // calculate how many code units are in the currently processed UTF code point + size_t n_utf8_code_units = std::min(utf8_len(normalized[input_offset]), input_len - input_offset); + + // traverse the token matcher trie to find a matching token + bool single_codepoint_token_found = false; + const struct best_tokenization & current_best = tokenization_results[input_offset]; + struct naive_trie * node = token_matcher.traverse(normalized[prefix_offset++]); + + while (prefix_offset <= input_len && node != NULL) { + // check if we found valid token in prefix + if (node->has_value) { + // check if it corresponds to the whole UTF code point + if (prefix_offset - input_offset == n_utf8_code_units) { + single_codepoint_token_found = true; + } + llama_token token_id = node->value; + const auto &token_data = vocab.id_to_token[token_id]; + + // we set the user-defined token scores to 0 to make them more likely to be selected + // (normal token scores are log probabilities, so they are negative) + // score type is double here to make tokenization results exactly + // the same as in the HF tokenizer using SentencePiece + const double token_score = llama_is_user_defined_token(vocab, token_id) ? 0.0 : token_data.score; + const double challenger_score = current_best.score_sum + token_score; + struct best_tokenization & current_champ = tokenization_results[prefix_offset]; + if (challenger_score > current_champ.score_sum) { + struct best_tokenization challenger = { token_id, input_offset, (float) challenger_score }; + current_champ = challenger; + } + } + node = node->traverse(normalized[prefix_offset++]); + } + + // if we didn't find a valid token corresponding to the whole UTF code point + // then use unknown token as the tokenization of this UTF code point + if (!single_codepoint_token_found) { + const double challenger_score = current_best.score_sum + unknown_token_score; + prefix_offset = input_offset + n_utf8_code_units; + struct best_tokenization & current_champ = tokenization_results[prefix_offset]; + if (challenger_score > current_champ.score_sum) { + struct best_tokenization challenger = { vocab.special_unk_id, input_offset, (float) challenger_score }; + current_champ = challenger; + } + } + + // move to the next UTF code point + input_offset += n_utf8_code_units; + } + + // now backtrack from the end to gather token ids of the best tokenization + // merge sequences of consecutive unknown tokens into single unknown tokens + bool is_prev_unknown = false; + for (struct best_tokenization & tokenization = tokenization_results[input_len]; ; tokenization = tokenization_results[tokenization.input_offset]) { + bool is_unknown = tokenization.token_id == vocab.special_unk_id; + if (!(is_prev_unknown && is_unknown)) { + output.push_back(tokenization.token_id); + } + if (tokenization.input_offset == 0) { + break; + } + is_prev_unknown = is_unknown; + } + + // reverse the output since we added tokens starting from the end of the input + std::reverse(output.begin(), output.end()); + } + +private: + const llama_vocab & vocab; + + // helper structure for returning normalization results + struct normalization_result { + const char * normalized; + size_t normalized_len; + size_t consumed_input; + }; + + void normalize(const std::string& input, std::string * normalized) { + normalized->clear(); + normalized->reserve(input.size() * 3); + + const std::string space = vocab.tokenizer_escape_whitespaces ? escaped_space : " "; + + bool shall_prepend_space = !vocab.tokenizer_treat_whitespace_as_suffix && vocab.tokenizer_add_space_prefix; + bool shall_append_space = vocab.tokenizer_treat_whitespace_as_suffix && vocab.tokenizer_add_space_prefix; + bool shall_merge_spaces = vocab.tokenizer_remove_extra_whitespaces; + + bool is_space_prepended = false; + bool processing_non_ws = false; + + size_t input_len = input.size(); + + for (size_t input_offset = 0; input_offset < input_len; ) { + auto norm_res = normalize_prefix(input, input_offset); + for (size_t i = 0; i < norm_res.normalized_len; i++) { + char c = norm_res.normalized[i]; + if (c != ' ') { + if (!processing_non_ws) { + processing_non_ws = true; + if ((shall_prepend_space && !is_space_prepended) || shall_merge_spaces) { + normalized->append(space); + is_space_prepended = true; + } + } + normalized->push_back(c); + } else { + if (processing_non_ws) { + processing_non_ws = false; + } + if (!shall_merge_spaces) { + normalized->append(space); + } + } + } + + input_offset += norm_res.consumed_input; + } + + if (shall_append_space) { + normalized->append(space); + } + } + + /* + * This structure is a view wrapper for XOR-compressed double array (XCDA) + * See Shunsuke Kanda (2018). Space- and Time-Efficient String Dictionaries. + * Eeach bit-packed entry contains: + * - BASE array value in bits 10-30 + * - LCHECK array value in bits 0-7 + * - LEAF array value in bit 9 + * Entries containing indexes of replacement sequences have set bit 31 + */ + struct xcda_array_view { + public: + xcda_array_view(const uint32_t * xcda_array, size_t xcda_array_size) : xcda_array(xcda_array), xcda_array_size(xcda_array_size) { + } + uint32_t get_base(size_t index) { + uint32_t packed_node = get_node(index); + return (packed_node >> 10) << ((packed_node & (1U << 9)) >> 6); + } + uint32_t get_lcheck(size_t index) { + uint32_t packed_node = get_node(index); + return packed_node & ((1U << 31) | 0xff); + } + bool get_leaf(size_t index) { + uint32_t packed_node = get_node(index); + return (packed_node >> 8) & 1; + } + uint32_t get_value(size_t index) { + uint32_t packed_node = get_node(index); + return packed_node & ((1U << 31) - 1); + } + private: + uint32_t get_node(size_t index) { + if (index > xcda_array_size) { + throw std::runtime_error("Index out of array bounds in XCDA array!"); + } + return xcda_array[index]; + } + const uint32_t * xcda_array; + size_t xcda_array_size; + }; + + struct normalization_result normalize_prefix(const std::string & input, size_t input_offset) { + if (input_offset == input.size()) { + return { &input[input_offset], 0, 0 }; + } + + // if input prefix matches some user-defined token return this token as normalization result + auto user_defined_token_match = user_defined_token_matcher.get_longest_prefix(&input[input_offset], input.size() - input_offset); + if (user_defined_token_match.second > 0) { + return { &input[input_offset], user_defined_token_match.second, user_defined_token_match.second }; + } + + size_t longest_prefix_length = 0; + size_t longest_prefix_offset = 0; + + if (xcda_array_size > 0) { + struct xcda_array_view xcda_view(xcda_array, xcda_array_size); + + // Find the longest normalized sequence matching the input prefix by walking + // the XOR-compressed compact double array (XCDA) starting from the root node + // We find the index of the next node by calculating BASE[s] ^ c where s is + // the index of the previous node and c is a numerical character value + uint32_t node_index = 0; + // get BASE of the root node + node_index = xcda_view.get_base(node_index); + for (size_t prefix_offset = input_offset; prefix_offset < input.size(); prefix_offset++) { + unsigned char c = input[prefix_offset]; + if (c == 0) { + break; + } + node_index ^= c; + // if value of LCHECK is not c it means that this is not a child of + // the previous node, so we stop matching + if (xcda_view.get_lcheck(node_index) != c) { + break; + } + bool is_leaf = xcda_view.get_leaf(node_index); + // get BASE of the current node + node_index ^= xcda_view.get_base(node_index); + // if LEAF of the current node is true, it means that its BASE points to the node + // containing index of replacement sequence for currently matched input prefix + if (is_leaf) + { + longest_prefix_length = prefix_offset - input_offset + 1; + // get index of replacement sequence for currently matched input prefix + longest_prefix_offset = xcda_view.get_value(node_index); + } + } + } + + if (longest_prefix_length > 0) { + // we have a match, so return the replacement sequence + if (longest_prefix_offset >= prefix_replacements_size) { + throw std::runtime_error("Index out of array bounds in precompiled charsmap!"); + } + const char * prefix_replacement = &prefix_replacements[longest_prefix_offset]; + return { prefix_replacement, strlen(prefix_replacement), longest_prefix_length }; + } else { + // check if the input prefix contains a valid sequence of UTF-8 code units + try { + // if yes, return this sequence unmodified + size_t prefix_offset = input_offset; + unicode_cpt_from_utf8(input, prefix_offset); + return { &input[input_offset], prefix_offset - input_offset, prefix_offset - input_offset }; + } catch(std::invalid_argument & ex) { + // if no, consume 1 byte and return U+FFFD - REPLACEMENT CHARACTER + return { "\xEF\xBF\xBD", 3, 1 }; + } + } + } + + // escaped space symbol - U+2581 (Lower One Eighth Block) + const std::string escaped_space = "\xE2\x96\x81"; + + const char * prefix_replacements = NULL; + size_t prefix_replacements_size = 0; + + const uint32_t * xcda_array = NULL; + size_t xcda_array_size = 0; + + struct naive_trie user_defined_token_matcher; + + // this structure stores the best tokenization so far at input_offset + struct best_tokenization { + llama_token token_id; + size_t input_offset; + float score_sum; + }; + + float min_score = FLT_MAX; + float max_score = -FLT_MAX; + + float unknown_token_score_penalty = 10.0; + float unknown_token_score; + + struct naive_trie token_matcher; +}; + + typedef enum FRAGMENT_BUFFER_VARIANT_TYPE { FRAGMENT_BUFFER_VARIANT_TYPE_TOKEN, FRAGMENT_BUFFER_VARIANT_TYPE_RAW_TEXT @@ -14086,6 +14592,39 @@ static std::vector llama_tokenize_internal(const llama_vocab & output.push_back(vocab.special_sep_id); } } break; + case LLAMA_VOCAB_TYPE_UGM: + { + llm_tokenizer_ugm tokenizer(vocab); + + if (add_special && vocab.tokenizer_add_bos != 0) { + GGML_ASSERT(vocab.special_bos_id != -1); + output.push_back(vocab.special_bos_id); + } + + for (const auto & fragment : fragment_buffer) { + if (fragment.type == FRAGMENT_BUFFER_VARIANT_TYPE_RAW_TEXT) { + auto raw_text = fragment.raw_text.substr(fragment.offset, fragment.length); +#ifdef PRETOKENIZERDEBUG + LLAMA_LOG_WARN("TT: (%ld %ld %ld) '%s'\n", raw_text.length(), fragment.offset, fragment.length, raw_text.c_str()); +#endif + tokenizer.tokenize(raw_text, output); + } else { // if (fragment.type == FRAGMENT_BUFFER_VARIANT_TYPE_TOKEN) + output.push_back(fragment.token); + } + } + + if (add_special && vocab.tokenizer_add_bos != 0 && output.size() >= 2 && output[1] == vocab.special_bos_id) { + LLAMA_LOG_WARN( + "%s: Added a BOS token to the prompt as specified by the model but the prompt " + "also starts with a BOS token. So now the final prompt starts with 2 BOS tokens. " + "Are you sure this is what you want?\n", __FUNCTION__); + } + + if (add_special && vocab.tokenizer_add_eos == 1) { + GGML_ASSERT(vocab.special_eos_id != -1); + output.push_back(vocab.special_eos_id); + } + } break; case LLAMA_VOCAB_TYPE_NONE: GGML_ASSERT(false); } @@ -16964,6 +17503,7 @@ enum llama_rope_type llama_rope_type(const struct llama_model * model) { case LLM_ARCH_BLOOM: case LLM_ARCH_MAMBA: case LLM_ARCH_JINA_BERT_V2: + case LLM_ARCH_T5: return LLAMA_ROPE_TYPE_NONE; // use what we call a normal RoPE, operating on pairs of consecutive head values @@ -18659,6 +19199,10 @@ llama_token llama_token_eot(const struct llama_model * model) { return model->vocab.special_eot_id; } +llama_token llama_token_pad(const struct llama_model * model) { + return model->vocab.special_pad_id; +} + int32_t llama_tokenize( const struct llama_model * model, const char * text, @@ -18725,7 +19269,8 @@ int32_t llama_token_to_piece(const struct llama_model * model, llama_token token if (0 <= token && token < llama_n_vocab(model)) { switch (llama_vocab_get_type(model->vocab)) { case LLAMA_VOCAB_TYPE_WPM: - case LLAMA_VOCAB_TYPE_SPM: { + case LLAMA_VOCAB_TYPE_SPM: + case LLAMA_VOCAB_TYPE_UGM: { // NOTE: we accept all unsupported token types, // suppressing them like CONTROL tokens. if (llama_is_normal_token(model->vocab, token)) { diff --git a/llama.h b/llama.h index 82d15747f4662..88eecb0edb17e 100644 --- a/llama.h +++ b/llama.h @@ -67,6 +67,7 @@ extern "C" { LLAMA_VOCAB_TYPE_SPM = 1, // LLaMA tokenizer based on byte-level BPE with byte fallback LLAMA_VOCAB_TYPE_BPE = 2, // GPT-2 tokenizer based on byte-level BPE LLAMA_VOCAB_TYPE_WPM = 3, // BERT tokenizer based on WordPiece + LLAMA_VOCAB_TYPE_UGM = 4, // T5 tokenizer based on Unigram }; // pre-tokenization types @@ -857,6 +858,7 @@ extern "C" { LLAMA_API llama_token llama_token_cls(const struct llama_model * model); // classification LLAMA_API llama_token llama_token_sep(const struct llama_model * model); // sentence separator LLAMA_API llama_token llama_token_nl (const struct llama_model * model); // next-line + LLAMA_API llama_token llama_token_pad(const struct llama_model * model); // padding // Returns -1 if unknown, 1 for true or 0 for false. LLAMA_API int32_t llama_add_bos_token(const struct llama_model * model); diff --git a/unicode.cpp b/unicode.cpp index c0b76bf20aede..8692924b957cc 100644 --- a/unicode.cpp +++ b/unicode.cpp @@ -23,7 +23,7 @@ static std::string unicode_cpts_to_utf8(const std::vector & cps) { return result; } -static uint32_t unicode_cpt_from_utf8(const std::string & utf8, size_t & offset) { +uint32_t unicode_cpt_from_utf8(const std::string & utf8, size_t & offset) { assert(offset < utf8.size()); if (!(utf8[offset + 0] & 0x80)) { auto result = utf8[offset + 0]; diff --git a/unicode.h b/unicode.h index 6c488970a79d6..30b07ba7fa493 100644 --- a/unicode.h +++ b/unicode.h @@ -48,6 +48,7 @@ struct codepoint_flags { std::string unicode_cpt_to_utf8(uint32_t cp); +uint32_t unicode_cpt_from_utf8(const std::string & utf8, size_t & offset); std::vector unicode_cpts_from_utf8(const std::string & utf8); std::vector unicode_cpts_normalize_nfd(const std::vector & cpts); From 163d50adaf8897d8b734d701ff332de6be63d484 Mon Sep 17 00:00:00 2001 From: jukofyork <69222624+jukofyork@users.noreply.github.com> Date: Tue, 25 Jun 2024 21:47:40 +0100 Subject: [PATCH 13/15] fixes #7999 (adds control vectors to all `build_XXX()` functions in `llama.cpp` [needs testing] (#8060) * fixes #7999 The `build_command_r` forgot to add the control vector. * Fixes qwen2 too * Fixed all models' control vectors * Removed double calls to `cb(cur, "l_out", il)` * Moved control vector logic to llama_control_vector:apply_to() --- llama.cpp | 112 +++++++++++++++++++++++++++++++++++------------------- 1 file changed, 73 insertions(+), 39 deletions(-) diff --git a/llama.cpp b/llama.cpp index 78a21008f9338..989c731495dbb 100644 --- a/llama.cpp +++ b/llama.cpp @@ -2368,13 +2368,21 @@ struct llama_control_vector { int32_t layer_start = -1; int32_t layer_end = -1; - ggml_tensor * tensor_for(int il) const { + struct ggml_tensor * tensor_for(int il) const { if (il < 0 || il < layer_start || il > layer_end || (size_t) il >= tensors.size()) { return nullptr; } return tensors[il]; } + struct ggml_tensor * apply_to(struct ggml_context * ctx, struct ggml_tensor * cur, int il) const { + ggml_tensor * layer_dir = tensor_for(il); + if (layer_dir != nullptr) { + cur = ggml_add(ctx, cur, layer_dir); + } + return cur; + } + ~llama_control_vector() { for (struct ggml_context * ctx : ctxs) { ggml_free(ctx); @@ -8023,10 +8031,7 @@ struct llm_build_context { cur = ggml_add(ctx0, cur, ffn_inp); cb(cur, "ffn_out", il); - ggml_tensor * layer_dir = lctx.cvec.tensor_for(il); - if (layer_dir != nullptr) { - cur = ggml_add(ctx0, cur, layer_dir); - } + cur = lctx.cvec.apply_to(ctx0, cur, il); cb(cur, "l_out", il); // input for next layer @@ -8141,6 +8146,7 @@ struct llm_build_context { } cur = ggml_add(ctx0, cur, ffn_inp); + cur = lctx.cvec.apply_to(ctx0, cur, il); cb(cur, "l_out", il); // input for next layer @@ -8245,6 +8251,7 @@ struct llm_build_context { } cur = ggml_add(ctx0, cur, ffn_inp); + cur = lctx.cvec.apply_to(ctx0, cur, il); cb(cur, "l_out", il); // input for next layer @@ -8360,9 +8367,8 @@ struct llm_build_context { } cur = ggml_add(ctx0, cur, ffn_inp); - cb(cur, "l_out", il); - cur = ggml_add(ctx0, cur, inpL); + cur = lctx.cvec.apply_to(ctx0, cur, il); cb(cur, "l_out", il); // input for next layer @@ -8514,10 +8520,7 @@ struct llm_build_context { cur = ggml_add(ctx0, cur, ffn_inp); cb(cur, "ffn_out", il); - ggml_tensor * layer_dir = lctx.cvec.tensor_for(il); - if (layer_dir != nullptr) { - cur = ggml_add(ctx0, cur, layer_dir); - } + cur = lctx.cvec.apply_to(ctx0, cur, il); cb(cur, "l_out", il); // input for next layer @@ -8648,10 +8651,7 @@ struct llm_build_context { cur = ggml_add(ctx0, cur, ffn_inp); cb(cur, "ffn_out", il); - ggml_tensor * layer_dir = lctx.cvec.tensor_for(il); - if (layer_dir != nullptr) { - cur = ggml_add(ctx0, cur, layer_dir); - } + cur = lctx.cvec.apply_to(ctx0, cur, il); cb(cur, "l_out", il); // input for next layer @@ -8757,8 +8757,12 @@ struct llm_build_context { cb(cur, "ffn_out", il); } - inpL = ggml_add(ctx0, cur, ffn_inp); - cb(inpL, "l_out", il); + cur = ggml_add(ctx0, cur, ffn_inp); + cur = lctx.cvec.apply_to(ctx0, cur, il); + cb(cur, "l_out", il); + + // input for next layer + inpL = cur; } cur = llm_build_norm(ctx0, inpL, hparams, @@ -8846,6 +8850,7 @@ struct llm_build_context { } cur = ggml_add(ctx0, cur, ffn_inp); + cur = lctx.cvec.apply_to(ctx0, cur, il); cb(cur, "l_out", il); // input for next layer @@ -9141,8 +9146,12 @@ struct llm_build_context { cb(cur, "ffn_out", il); } - inpL = ggml_add(ctx0, cur, ffn_inp); - cb(inpL, "l_out", il); + cur = ggml_add(ctx0, cur, ffn_inp); + cur = lctx.cvec.apply_to(ctx0, cur, il); + cb(cur, "l_out", il); + + // input for next layer + inpL = cur; } cur = llm_build_norm(ctx0, inpL, hparams, @@ -9276,6 +9285,7 @@ struct llm_build_context { } cur = ggml_add(ctx0, cur, ffn_inp); + cur = lctx.cvec.apply_to(ctx0, cur, il); cb(cur, "l_out", il); // input for next layer @@ -9424,6 +9434,7 @@ struct llm_build_context { } cur = ggml_add(ctx0, cur, ffn_inp); + cur = lctx.cvec.apply_to(ctx0, cur, il); cb(cur, "l_out", il); // input for next layer @@ -9536,6 +9547,7 @@ struct llm_build_context { } cur = ggml_add(ctx0, cur, ffn_inp); + cur = lctx.cvec.apply_to(ctx0, cur, il); cb(cur, "l_out", il); // input for next layer @@ -9647,6 +9659,7 @@ struct llm_build_context { cb(cur, "ffn_out", il); cur = ggml_add(ctx0, cur, ffn_inp); + cur = lctx.cvec.apply_to(ctx0, cur, il); cb(cur, "l_out", il); // input for next layer @@ -9792,6 +9805,7 @@ struct llm_build_context { } cur = ggml_add(ctx0, cur, ffn_inp); + cur = lctx.cvec.apply_to(ctx0, cur, il); cb(cur, "l_out", il); // input for next layer @@ -9912,11 +9926,11 @@ struct llm_build_context { } cur = ggml_add(ctx0, cur, ffn_output); - cb(cur, "l_out", il); - cur = ggml_add(ctx0, cur, inpL); + cur = lctx.cvec.apply_to(ctx0, cur, il); cb(cur, "l_out", il); + // input for next layer inpL = cur; } @@ -10048,8 +10062,10 @@ struct llm_build_context { } cur = ggml_add(ctx0, residual, cur); + cur = lctx.cvec.apply_to(ctx0, cur, il); cb(cur, "l_out", il); + // input for next layer inpL = cur; } @@ -10148,9 +10164,8 @@ struct llm_build_context { } cur = ggml_add(ctx0, cur, sa_out); - cb(cur, "l_out", il); - cur = ggml_add(ctx0, cur, inpL); + cur = lctx.cvec.apply_to(ctx0, cur, il); cb(cur, "l_out", il); // input for next layer @@ -10256,8 +10271,12 @@ struct llm_build_context { cb(cur, "ffn_out", il); } - inpL = ggml_add(ctx0, cur, ffn_inp); - cb(inpL, "l_out", il); + cur = ggml_add(ctx0, cur, ffn_inp); + cur = lctx.cvec.apply_to(ctx0, cur, il); + cb(cur, "l_out", il); + + // input for next layer + inpL = cur; } cur = llm_build_norm(ctx0, inpL, hparams, @@ -10363,8 +10382,12 @@ struct llm_build_context { cb(cur, "ffn_out", il); } - inpL = ggml_add(ctx0, cur, ffn_inp); - cb(inpL, "l_out", il); + cur = ggml_add(ctx0, cur, ffn_inp); + cur = lctx.cvec.apply_to(ctx0, cur, il); + cb(cur, "l_out", il); + + // input for next layer + inpL = cur; } cur = llm_build_norm(ctx0, inpL, hparams, @@ -10476,6 +10499,7 @@ struct llm_build_context { cb(cur, "ffn_out", il); cur = ggml_add(ctx0, cur, ffn_inp); + cur = lctx.cvec.apply_to(ctx0, cur, il); cb(cur, "l_out", il); // input for next layer @@ -10593,6 +10617,7 @@ struct llm_build_context { cb(cur, "ffn_out", il); cur = ggml_add(ctx0, cur, ffn_inp); + cur = lctx.cvec.apply_to(ctx0, cur, il); cb(cur, "l_out", il); // input for next layer @@ -10734,6 +10759,7 @@ struct llm_build_context { cb(cur, "hidden_scaled_ffn", -1); cur = ggml_add(ctx0, cur, ffn_inp); + cur = lctx.cvec.apply_to(ctx0, cur, il); cb(cur, "l_out", il); // input for next layer @@ -10846,6 +10872,7 @@ struct llm_build_context { } cur = ggml_add(ctx0, cur, sa_out); + cur = lctx.cvec.apply_to(ctx0, cur, il); cb(cur, "l_out", il); // input for next layer @@ -10962,7 +10989,9 @@ struct llm_build_context { NULL, LLM_FFN_GELU, LLM_FFN_SEQ, cb, il); cb(cur, "ffn_out", il); + cur = ggml_add(ctx0, cur, ffn_inp); + cur = lctx.cvec.apply_to(ctx0, cur, il); cb(cur, "l_out", il); // input for next layer @@ -11111,6 +11140,7 @@ struct llm_build_context { // residual cur = ggml_add(ctx0, cur, inpL); + cur = lctx.cvec.apply_to(ctx0, cur, il); cb(cur, "l_out", il); // input for next layer @@ -11252,6 +11282,7 @@ struct llm_build_context { // add together residual + FFN + self-attention cur = ggml_add(ctx0, cur, inpL); cur = ggml_add(ctx0, cur, attn_out); + cur = lctx.cvec.apply_to(ctx0, cur, il); cb(cur, "l_out", il); // input for next layer @@ -11387,10 +11418,7 @@ struct llm_build_context { cur = ggml_add(ctx0, cur, ffn_inp); cb(cur, "ffn_out", il); - ggml_tensor * layer_dir = lctx.cvec.tensor_for(il); - if (layer_dir != nullptr) { - cur = ggml_add(ctx0, cur, layer_dir); - } + cur = lctx.cvec.apply_to(ctx0, cur, il); cb(cur, "l_out", il); // input for next layer @@ -11504,8 +11532,12 @@ struct llm_build_context { cur = ggml_add(ctx0, cur, inpL); cb(cur, "ffn_out", il); - inpL = ggml_add(ctx0, cur, attn_out); - cb(inpL, "l_out", il); + cur = ggml_add(ctx0, cur, attn_out); + cur = lctx.cvec.apply_to(ctx0, cur, il); + cb(cur, "l_out", il); + + // input for next layer + inpL = cur; } else { // attention and ffn are computed sequentially // x = x + attn(ln1(x)) @@ -11528,8 +11560,12 @@ struct llm_build_context { LLM_FFN_GELU, LLM_FFN_SEQ, cb, il); cb(cur, "ffn_out", il); - inpL = ggml_add(ctx0, cur, ffn_inp); - cb(inpL, "l_out", il); + cur = ggml_add(ctx0, cur, ffn_inp); + cur = lctx.cvec.apply_to(ctx0, cur, il); + cb(cur, "l_out", il); + + // input for next layer + inpL = cur; } } @@ -11656,10 +11692,7 @@ struct llm_build_context { cur = ggml_add(ctx0, cur, ffn_out); cb(cur, "ffn_out", il); - ggml_tensor * layer_dir = lctx.cvec.tensor_for(il); - if (layer_dir != nullptr) { - cur = ggml_add(ctx0, cur, layer_dir); - } + cur = lctx.cvec.apply_to(ctx0, cur, il); cb(cur, "l_out", il); // input for next layer @@ -11892,6 +11925,7 @@ struct llm_build_context { } cur = ggml_add(ctx0, cur, ffn_inp); + cur = lctx.cvec.apply_to(ctx0, cur, il); cb(cur, "l_out", il); // input for next layer From 6777c544bdd8c5d9de3220d6e2557957bbbf2a4f Mon Sep 17 00:00:00 2001 From: Olivier Chafik Date: Wed, 26 Jun 2024 01:45:58 +0100 Subject: [PATCH 14/15] `json`: fix additionalProperties, allow space after enum/const (#7840) * json: default additionalProperty to true * json: don't force additional props after normal properties! * json: allow space after enum/const * json: update pydantic example to set additionalProperties: false * json: prevent additional props to redefine a typed prop * port not_strings to python, add trailing space * fix not_strings & port to js+py * Update json-schema-to-grammar.cpp * fix _not_strings for substring overlaps * json: fix additionalProperties default, uncomment tests * json: add integ. test case for additionalProperties * json: nit: simplify condition * reformat grammar integ tests w/ R"""()""" strings where there's escapes * update # tokens in server test: consts can now have trailing space --- common/json-schema-to-grammar.cpp | 99 +++++- examples/json-schema-pydantic-example.py | 6 +- examples/json_schema_to_grammar.py | 76 ++++- .../server/public/json-schema-to-grammar.mjs | 89 ++++- examples/server/tests/features/server.feature | 2 +- tests/test-grammar-integration.cpp | 320 ++++++++---------- tests/test-json-schema-to-grammar.cpp | 150 ++++++-- 7 files changed, 497 insertions(+), 245 deletions(-) diff --git a/common/json-schema-to-grammar.cpp b/common/json-schema-to-grammar.cpp index 07d0e952d74cf..b40821dadc05e 100644 --- a/common/json-schema-to-grammar.cpp +++ b/common/json-schema-to-grammar.cpp @@ -614,6 +614,75 @@ class SchemaConverter { return _add_rule(name, "\"\\\"\" " + to_rule(transform()) + " \"\\\"\" space"); } + /* + Returns a rule that matches a JSON string that is none of the provided strings + + not_strings({"a"}) + -> ["] ( [a] char+ | [^"a] char* )? ["] space + not_strings({"and", "also"}) + -> ["] ( [a] ([l] ([s] ([o] char+ | [^"o] char*) | [^"s] char*) | [n] ([d] char+ | [^"d] char*) | [^"ln] char*) | [^"a] char* )? ["] space + */ + std::string _not_strings(const std::vector & strings) { + + struct TrieNode { + std::map children; + bool is_end_of_string; + + TrieNode() : is_end_of_string(false) {} + + void insert(const std::string & string) { + auto node = this; + for (char c : string) { + node = &node->children[c]; + } + node->is_end_of_string = true; + } + }; + + TrieNode trie; + for (const auto & s : strings) { + trie.insert(s); + } + + std::string char_rule = _add_primitive("char", PRIMITIVE_RULES.at("char")); + std::ostringstream out; + out << "[\"] ( "; + std::function visit = [&](const TrieNode & node) { + std::ostringstream rejects; + auto first = true; + for (const auto & kv : node.children) { + rejects << kv.first; + if (first) { + first = false; + } else { + out << " | "; + } + out << "[" << kv.first << "]"; + if (!kv.second.children.empty()) { + out << " ("; + visit(kv.second); + out << ")"; + } else if (kv.second.is_end_of_string) { + out << " " << char_rule << "+"; + } + } + if (!node.children.empty()) { + if (!first) { + out << " | "; + } + out << "[^\"" << rejects.str() << "] " << char_rule << "*"; + } + }; + visit(trie); + + out << " )"; + if (!trie.is_end_of_string) { + out << "?"; + } + out << " [\"] space"; + return out.str(); + } + std::string _resolve_ref(const std::string & ref) { std::string ref_name = ref.substr(ref.find_last_of('/') + 1); if (_rules.find(ref_name) == _rules.end() && _refs_being_resolved.find(ref) == _refs_being_resolved.end()) { @@ -634,6 +703,7 @@ class SchemaConverter { std::vector required_props; std::vector optional_props; std::unordered_map prop_kv_rule_names; + std::vector prop_names; for (const auto & kv : properties) { const auto &prop_name = kv.first; const auto &prop_schema = kv.second; @@ -648,11 +718,18 @@ class SchemaConverter { } else { optional_props.push_back(prop_name); } + prop_names.push_back(prop_name); } - if (additional_properties.is_object() || (additional_properties.is_boolean() && additional_properties.get())) { + if (!(additional_properties.is_boolean() && !additional_properties.get())) { std::string sub_name = name + (name.empty() ? "" : "-") + "additional"; - std::string value_rule = visit(additional_properties.is_object() ? additional_properties : json::object(), sub_name + "-value"); - std::string kv_rule = _add_rule(sub_name + "-kv", _add_primitive("string", PRIMITIVE_RULES.at("string")) + " \":\" space " + value_rule); + std::string value_rule = + additional_properties.is_object() ? visit(additional_properties, sub_name + "-value") + : _add_primitive("value", PRIMITIVE_RULES.at("value")); + + auto key_rule = + prop_names.empty() ? _add_primitive("string", PRIMITIVE_RULES.at("string")) + : _add_rule(sub_name + "-k", _not_strings(prop_names)); + std::string kv_rule = _add_rule(sub_name + "-kv", key_rule + " \":\" space " + value_rule); prop_kv_rule_names["*"] = kv_rule; optional_props.push_back("*"); } @@ -678,15 +755,11 @@ class SchemaConverter { } std::string k = ks[0]; std::string kv_rule_name = prop_kv_rule_names[k]; - if (k == "*") { - res = _add_rule( - name + (name.empty() ? "" : "-") + "additional-kvs", - kv_rule_name + " ( \",\" space " + kv_rule_name + " )*" - ); - } else if (first_is_optional) { - res = "( \",\" space " + kv_rule_name + " )?"; + std::string comma_ref = "( \",\" space " + kv_rule_name + " )"; + if (first_is_optional) { + res = comma_ref + (k == "*" ? "*" : "?"); } else { - res = kv_rule_name; + res = kv_rule_name + (k == "*" ? " " + comma_ref + "*" : ""); } if (ks.size() > 1) { res += " " + _add_rule( @@ -824,13 +897,13 @@ class SchemaConverter { } return _add_rule(rule_name, _generate_union_rule(name, schema_types)); } else if (schema.contains("const")) { - return _add_rule(rule_name, _generate_constant_rule(schema["const"])); + return _add_rule(rule_name, _generate_constant_rule(schema["const"]) + " space"); } else if (schema.contains("enum")) { std::vector enum_values; for (const auto & v : schema["enum"]) { enum_values.push_back(_generate_constant_rule(v)); } - return _add_rule(rule_name, join(enum_values.begin(), enum_values.end(), " | ")); + return _add_rule(rule_name, "(" + join(enum_values.begin(), enum_values.end(), " | ") + ") space"); } else if ((schema_type.is_null() || schema_type == "object") && (schema.contains("properties") || (schema.contains("additionalProperties") && schema["additionalProperties"] != true))) { diff --git a/examples/json-schema-pydantic-example.py b/examples/json-schema-pydantic-example.py index 2240188cd031e..2a24f81189fb1 100644 --- a/examples/json-schema-pydantic-example.py +++ b/examples/json-schema-pydantic-example.py @@ -3,7 +3,7 @@ #! pip install pydantic #! python json-schema-pydantic-example.py -from pydantic import BaseModel, TypeAdapter +from pydantic import BaseModel, Extra, TypeAdapter from annotated_types import MinLen from typing import Annotated, List, Optional import json, requests @@ -50,12 +50,16 @@ def create_completion(*, response_model=None, endpoint="http://localhost:8080/v1 if __name__ == '__main__': class QAPair(BaseModel): + class Config: + extra = 'forbid' # triggers additionalProperties: false in the JSON schema question: str concise_answer: str justification: str stars: Annotated[int, Field(ge=1, le=5)] class PyramidalSummary(BaseModel): + class Config: + extra = 'forbid' # triggers additionalProperties: false in the JSON schema title: str summary: str question_answers: Annotated[List[QAPair], MinLen(2)] diff --git a/examples/json_schema_to_grammar.py b/examples/json_schema_to_grammar.py index 86500a8c3c238..3f3132f88a824 100755 --- a/examples/json_schema_to_grammar.py +++ b/examples/json_schema_to_grammar.py @@ -4,8 +4,7 @@ import json import re import sys -from typing import Any, Callable, Dict, List, Optional, Set, Tuple, Union - +from typing import Any, List, Optional, Set, Tuple, Union def _build_repetition(item_rule, min_items, max_items, separator_rule=None): @@ -276,6 +275,51 @@ def recurse(i: int): return ''.join(('(', *recurse(0), ')')) + def _not_strings(self, strings): + class TrieNode: + def __init__(self): + self.children = {} + self.is_end_of_string = False + + def insert(self, string): + node = self + for c in string: + node = node.children.setdefault(c, TrieNode()) + node.is_end_of_string = True + + trie = TrieNode() + for s in strings: + trie.insert(s) + + char_rule = self._add_primitive('char', PRIMITIVE_RULES['char']) + out = ['["] ( '] + + def visit(node): + rejects = [] + first = True + for c in sorted(node.children.keys()): + child = node.children[c] + rejects.append(c) + if first: + first = False + else: + out.append(' | ') + out.append(f'[{c}]') + if child.children: + out.append(f' (') + visit(child) + out.append(')') + elif child.is_end_of_string: + out.append(f' {char_rule}+') + if node.children: + if not first: + out.append(' | ') + out.append(f'[^"{"".join(rejects)}] {char_rule}*') + visit(trie) + + out.append(f' ){"" if trie.is_end_of_string else "?"} ["] space') + return ''.join(out) + def _add_rule(self, name, rule): esc_name = INVALID_RULE_CHARS_RE.sub('-', name) if esc_name not in self._rules or self._rules[esc_name] == rule: @@ -524,10 +568,10 @@ def visit(self, schema, name): return self._add_rule(rule_name, self._generate_union_rule(name, [{'type': t} for t in schema_type])) elif 'const' in schema: - return self._add_rule(rule_name, self._generate_constant_rule(schema['const'])) + return self._add_rule(rule_name, self._generate_constant_rule(schema['const']) + ' space') elif 'enum' in schema: - rule = ' | '.join((self._generate_constant_rule(v) for v in schema['enum'])) + rule = '(' + ' | '.join((self._generate_constant_rule(v) for v in schema['enum'])) + ') space' return self._add_rule(rule_name, rule) elif schema_type in (None, 'object') and \ @@ -632,7 +676,7 @@ def _add_primitive(self, name: str, rule: BuiltinRule): self._add_primitive(dep, dep_rule) return n - def _build_object_rule(self, properties: List[Tuple[str, Any]], required: Set[str], name: str, additional_properties: Union[bool, Any]): + def _build_object_rule(self, properties: List[Tuple[str, Any]], required: Set[str], name: str, additional_properties: Optional[Union[bool, Any]]): prop_order = self._prop_order # sort by position in prop_order (if specified) then by original order sorted_props = [kv[0] for _, kv in sorted(enumerate(properties), key=lambda ikv: (prop_order.get(ikv[1][0], len(prop_order)), ikv[0]))] @@ -647,12 +691,16 @@ def _build_object_rule(self, properties: List[Tuple[str, Any]], required: Set[st required_props = [k for k in sorted_props if k in required] optional_props = [k for k in sorted_props if k not in required] - if additional_properties == True or isinstance(additional_properties, dict): + if additional_properties != False: sub_name = f'{name}{"-" if name else ""}additional' - value_rule = self.visit({} if additional_properties == True else additional_properties, f'{sub_name}-value') + value_rule = self.visit(additional_properties, f'{sub_name}-value') if isinstance(additional_properties, dict) else \ + self._add_primitive('value', PRIMITIVE_RULES['value']) + key_rule = self._add_primitive('string', PRIMITIVE_RULES['string']) if not sorted_props \ + else self._add_rule(f'{sub_name}-k', self._not_strings(sorted_props)) + prop_kv_rule_names["*"] = self._add_rule( f'{sub_name}-kv', - self._add_primitive('string', PRIMITIVE_RULES['string']) + f' ":" space {value_rule}' + f'{key_rule} ":" space {value_rule}' ) optional_props.append("*") @@ -667,15 +715,11 @@ def _build_object_rule(self, properties: List[Tuple[str, Any]], required: Set[st def get_recursive_refs(ks, first_is_optional): [k, *rest] = ks kv_rule_name = prop_kv_rule_names[k] - if k == '*': - res = self._add_rule( - f'{name}{"-" if name else ""}additional-kvs', - f'{kv_rule_name} ( "," space ' + kv_rule_name + ' )*' - ) - elif first_is_optional: - res = f'( "," space {kv_rule_name} )?' + comma_ref = f'( "," space {kv_rule_name} )' + if first_is_optional: + res = comma_ref + ('*' if k == '*' else '?') else: - res = kv_rule_name + res = kv_rule_name + (' ' + comma_ref + "*" if k == '*' else '') if len(rest) > 0: res += ' ' + self._add_rule( f'{name}{"-" if name else ""}{k}-rest', diff --git a/examples/server/public/json-schema-to-grammar.mjs b/examples/server/public/json-schema-to-grammar.mjs index f340f94bd75bc..02015bbd49015 100644 --- a/examples/server/public/json-schema-to-grammar.mjs +++ b/examples/server/public/json-schema-to-grammar.mjs @@ -532,6 +532,64 @@ export class SchemaConverter { return this._addRule(name, "\"\\\"\" " + toRule(transform()) + " \"\\\"\" space") } + _notStrings(strings) { + class TrieNode { + constructor() { + this.children = {}; + this.isEndOfString = false; + } + + insert(str) { + let node = this; + for (const c of str) { + node = node.children[c] = node.children[c] || new TrieNode(); + } + node.isEndOfString = true; + } + } + + const trie = new TrieNode(); + for (const s of strings) { + trie.insert(s); + } + + const charRuleName = this._addPrimitive('char', PRIMITIVE_RULES['char']); + const out = ['["] ( ']; + + const visit = (node) => { + const rejects = []; + let first = true; + for (const c of Object.keys(node.children).sort()) { + const child = node.children[c]; + rejects.push(c); + if (first) { + first = false; + } else { + out.push(' | '); + } + out.push(`[${c}]`); + if (Object.keys(child.children).length > 0) { + out.push(' ('); + visit(child); + out.push(')'); + } else if (child.isEndOfString) { + out.push(` ${charRuleName}+`); + } + } + if (Object.keys(node.children).length > 0) { + if (!first) { + out.push(' | '); + } + out.push(`[^"${rejects.join('')}] ${charRuleName}*`); + } + }; + + visit(trie); + + out.push(` )${trie.isEndOfString ? '' : '?'} ["] space`); + return out.join(''); + } + _resolveRef(ref) { let refName = ref.split('/').pop(); if (!(refName in this._rules) && !this._refsBeingResolved.has(ref)) { @@ -560,9 +618,9 @@ export class SchemaConverter { } else if (Array.isArray(schemaType)) { return this._addRule(ruleName, this._generateUnionRule(name, schemaType.map(t => ({ type: t })))); } else if ('const' in schema) { - return this._addRule(ruleName, this._generateConstantRule(schema.const)); + return this._addRule(ruleName, this._generateConstantRule(schema.const) + ' space'); } else if ('enum' in schema) { - const rule = schema.enum.map(v => this._generateConstantRule(v)).join(' | '); + const rule = '(' + schema.enum.map(v => this._generateConstantRule(v)).join(' | ') + ') space'; return this._addRule(ruleName, rule); } else if ((schemaType === undefined || schemaType === 'object') && ('properties' in schema || @@ -599,7 +657,7 @@ export class SchemaConverter { } } - return this._addRule(ruleName, this._buildObjectRule(properties, required, name, /* additionalProperties= */ false)); + return this._addRule(ruleName, this._buildObjectRule(properties, required, name, null)); } else if ((schemaType === undefined || schemaType === 'array') && ('items' in schema || 'prefixItems' in schema)) { const items = schema.items ?? schema.prefixItems; if (Array.isArray(items)) { @@ -693,12 +751,19 @@ export class SchemaConverter { const requiredProps = sortedProps.filter(k => required.has(k)); const optionalProps = sortedProps.filter(k => !required.has(k)); - if (typeof additionalProperties === 'object' || additionalProperties === true) { + if (additionalProperties !== false) { const subName = `${name ?? ''}${name ? '-' : ''}additional`; - const valueRule = this.visit(additionalProperties === true ? {} : additionalProperties, `${subName}-value`); + const valueRule = + additionalProperties != null && typeof additionalProperties === 'object' ? this.visit(additionalProperties, `${subName}-value`) + : this._addPrimitive('value', PRIMITIVE_RULES['value']); + + const key_rule = + sortedProps.length === 0 ? this._addPrimitive('string', PRIMITIVE_RULES['string']) + : this._addRule(`${subName}-k`, this._notStrings(sortedProps)); + propKvRuleNames['*'] = this._addRule( `${subName}-kv`, - `${this._addPrimitive('string', PRIMITIVE_RULES['string'])} ":" space ${valueRule}`); + `${key_rule} ":" space ${valueRule}`); optionalProps.push('*'); } @@ -715,15 +780,11 @@ export class SchemaConverter { const [k, ...rest] = ks; const kvRuleName = propKvRuleNames[k]; let res; - if (k === '*') { - res = this._addRule( - `${name ?? ''}${name ? '-' : ''}additional-kvs`, - `${kvRuleName} ( "," space ` + kvRuleName + ` )*` - ) - } else if (firstIsOptional) { - res = `( "," space ${kvRuleName} )?`; + const commaRef = `( "," space ${kvRuleName} )`; + if (firstIsOptional) { + res = commaRef + (k === '*' ? '*' : '?'); } else { - res = kvRuleName; + res = kvRuleName + (k === '*' ? ' ' + commaRef + '*' : ''); } if (rest.length > 0) { res += ' ' + this._addRule( diff --git a/examples/server/tests/features/server.feature b/examples/server/tests/features/server.feature index d21c09135243a..b55971454afc3 100644 --- a/examples/server/tests/features/server.feature +++ b/examples/server/tests/features/server.feature @@ -82,7 +82,7 @@ Feature: llama.cpp server Examples: Prompts | response_format | n_predicted | re_content | - | {"type": "json_object", "schema": {"const": "42"}} | 5 | "42" | + | {"type": "json_object", "schema": {"const": "42"}} | 6 | "42" | | {"type": "json_object", "schema": {"items": [{"type": "integer"}]}} | 10 | \[ -300 \] | | {"type": "json_object"} | 10 | \{ " Jacky. | diff --git a/tests/test-grammar-integration.cpp b/tests/test-grammar-integration.cpp index 5750d362a7247..23ef8324c1327 100644 --- a/tests/test-grammar-integration.cpp +++ b/tests/test-grammar-integration.cpp @@ -15,8 +15,6 @@ using json = nlohmann::ordered_json; -//#define INCLUDE_FAILING_TESTS 1 - static llama_grammar* build_grammar(const std::string & grammar_str) { auto parsed_grammar = grammar_parser::parse(grammar_str.c_str()); @@ -754,7 +752,7 @@ static void test_json_schema() { )""", // Passing strings { - "{}", + R"""({})""", R"""({"foo": "bar"})""", }, // Failing strings @@ -762,7 +760,7 @@ static void test_json_schema() { "", "[]", "null", - "\"\"", + R"""("")""", "true", } ); @@ -770,16 +768,14 @@ static void test_json_schema() { test_schema( "exotic formats (list)", // Schema - R"""( - { + R"""({ "items": [ { "format": "date" }, { "format": "uuid" }, { "format": "time" }, { "format": "date-time" } ] - } - )""", + })""", // Passing strings { // "{}", // NOTE: This string passes for this schema on https://www.jsonschemavalidator.net/ -- should it? @@ -798,125 +794,113 @@ static void test_json_schema() { test_schema( "string", // Schema - R"""( - { - "type": "string" - } - )""", + R"""({ + "type": "string" + })""", // Passing strings { - "\"foo\"", - "\"bar\"", - "\"\"", + R"""("foo")""", + R"""("bar")""", + R"""("")""", }, // Failing strings { - "{}", - "\"foo\": \"bar\"", + R"""({})""", + R"""("foo": "bar")""", } ); test_schema( "string w/ min length 1", // Schema - R"""( - { - "type": "string", - "minLength": 1 - } - )""", + R"""({ + "type": "string", + "minLength": 1 + })""", // Passing strings { - "\"foo\"", - "\"bar\"", + R"""("foo")""", + R"""("bar")""", }, // Failing strings { - "\"\"", - "{}", - "\"foo\": \"bar\"", + R"""("")""", + R"""({})""", + R"""("foo": "bar")""", } ); test_schema( "string w/ min length 3", // Schema - R"""( - { + R"""({ "type": "string", "minLength": 3 - } - )""", + })""", // Passing strings { - "\"foo\"", - "\"bar\"", - "\"foobar\"", + R"""("foo")""", + R"""("bar")""", + R"""("foobar")""", }, // Failing strings { - "\"\"", - "\"f\"", - "\"fo\"", + R"""("")""", + R"""("f")""", + R"""("fo")""", } ); test_schema( "string w/ max length", // Schema - R"""( - { - "type": "string", - "maxLength": 3 - } - )""", + R"""({ + "type": "string", + "maxLength": 3 + })""", // Passing strings { - "\"foo\"", - "\"bar\"", - "\"\"", - "\"f\"", - "\"fo\"", + R"""("foo")""", + R"""("bar")""", + R"""("")""", + R"""("f")""", + R"""("fo")""", }, // Failing strings { - "\"foobar\"", + R"""("foobar")""", } ); test_schema( "string w/ min & max length", // Schema - R"""( - { - "type": "string", - "minLength": 1, - "maxLength": 4 - } - )""", + R"""({ + "type": "string", + "minLength": 1, + "maxLength": 4 + })""", // Passing strings { - "\"foo\"", - "\"bar\"", - "\"f\"", - "\"barf\"", + R"""("foo")""", + R"""("bar")""", + R"""("f")""", + R"""("barf")""", }, // Failing strings { - "\"\"", - "\"barfo\"", - "\"foobar\"", + R"""("")""", + R"""("barfo")""", + R"""("foobar")""", } ); test_schema( "boolean", // Schema - R"""( - { - "type": "boolean" - } - )""", + R"""({ + "type": "boolean" + })""", // Passing strings { "true", @@ -924,122 +908,112 @@ static void test_json_schema() { }, // Failing strings { - "\"\"", - "\"true\"", - "True", - "FALSE", + R"""("")""", + R"""("true")""", + R"""(True)""", + R"""(FALSE)""", } ); test_schema( "integer", // Schema - R"""( - { - "type": "integer" - } - )""", + R"""({ + "type": "integer" + })""", // Passing strings { - "0", - "12345", - "1234567890123456" + R"""(0)""", + R"""(12345)""", + R"""(1234567890123456)""", }, // Failing strings { - "", - "01", - "007", - "12345678901234567" + R"""()""", + R"""(01)""", + R"""(007)""", + R"""(12345678901234567 )""", } ); test_schema( "string const", // Schema - R"""( - { - "const": "foo" - } - )""", + R"""({ + "const": "foo" + })""", // Passing strings { - "\"foo\"", + R"""("foo")""", }, // Failing strings { - "foo", - "\"bar\"", + R"""(foo)""", + R"""("bar")""", } ); test_schema( "non-string const", // Schema - R"""( - { - "const": true - } - )""", + R"""({ + "const": true + })""", // Passing strings { - "true", + R"""(true)""", }, // Failing strings { - "", - "foo", - "\"true\"", + R"""()""", + R"""(foo)""", + R"""("true")""", } ); test_schema( "non-string const", // Schema - R"""( - { - "enum": ["red", "amber", "green", null, 42, ["foo"]] - } - )""", + R"""({ + "enum": ["red", "amber", "green", null, 42, ["foo"]] + })""", // Passing strings { - "\"red\"", - "null", - "42", - "[\"foo\"]", + R"""("red")""", + R"""(null)""", + R"""(42)""", + R"""(["foo"])""", }, // Failing strings { - "", - "420", - "true", - "foo", + R"""()""", + R"""(420)""", + R"""(true)""", + R"""(foo)""", } ); test_schema( "min+max items", // Schema - R"""( - { - "items": { - "type": ["number", "integer"] - }, - "minItems": 3, - "maxItems": 5 - } - )""", + R"""({ + "items": { + "type": ["number", "integer"] + }, + "minItems": 3, + "maxItems": 5 + })""", // Passing strings { - "[1, 2, 3]", - "[1, 2, 3, 4]", - "[1, 2, 3, 4, 5]", + R"""([1, 2, 3])""", + R"""([1, 2, 3, 4])""", + R"""([1, 2, 3, 4, 5])""", }, // Failing strings { - "[1, 2]", - "[1, 2, 3, 4, 5, 6]", - "1" + R"""([1, 2])""", + R"""([1, 2, 3, 4, 5, 6])""", + R"""(1)""", } ); @@ -1047,16 +1021,14 @@ static void test_json_schema() { test_schema( "object properties", // Schema - R"""( - { + R"""({ "type": "object", "properties": { "number": { "type": "number" }, "street_name": { "type": "string" }, "street_type": { "enum": ["Street", "Avenue", "Boulevard"] } } - } - )""", + })""", // Passing strings { R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type":"Avenue"})""", @@ -1066,12 +1038,8 @@ static void test_json_schema() { // "By extension, even an empty object is valid" R"""({})""", // "By default, providing additional properties is valid" -#ifdef INCLUDE_FAILING_TESTS - // TODO: The following should pass, but currently FAILS. Additional properties should be permitted by default. R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type":"Avenue", "direction":"NW"})""", - // TODO: Spaces should be permitted around enum values, but currently they fail to pass. R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type": "Avenue" })""", -#endif }, // Failing strings { @@ -1084,13 +1052,35 @@ static void test_json_schema() { } ); + test_schema( + "additional properties can't override other properties", + R"""({ + "properties": { + "a": {"type": "integer"}, + "b": {"type": "integer"} + }, + "additionalProperties": true + })""", + // Passing strings + { + R"""({"a": 42})""", + R"""({"c": ""})""", + R"""({"a": 42, "c": ""})""", + R"""({"a_": ""})""", + }, + // Failing strings + { + R"""()""", + R"""({"a": ""})""", + R"""({"a": "", "b": ""})""", + } + ); // Properties (from: https://json-schema.org/understanding-json-schema/reference/object#properties) test_schema( "object properties, additionalProperties: true", // Schema - R"""( - { + R"""({ "type": "object", "properties": { "number": { "type": "number" }, @@ -1098,26 +1088,18 @@ static void test_json_schema() { "street_type": { "enum": ["Street", "Avenue", "Boulevard"] } }, "additionalProperties": true - } - )""", + })""", // Passing strings { // "By extension, even an empty object is valid" R"""({})""", -#ifdef INCLUDE_FAILING_TESTS - // TODO: Following line should pass and doesn't R"""({"number":1600,"street_name":"Pennsylvania","street_type":"Avenue"})""", // "By default, leaving out properties is valid" - // TODO: Following line should pass and doesn't R"""({ "street_name": "Pennsylvania" })""", - // TODO: Following line should pass and doesn't R"""({ "number": 1600, "street_name": "Pennsylvania" })""", // "By default, providing additional properties is valid" - // TODO: The following should pass, but currently FAILS. Additional properties should be permitted by default. R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type":"Avenue", "direction":"NW"})""", - // TODO: Spaces should be permitted around enum values, but currently they fail to pass. R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type": "Avenue" })""", -#endif }, // Failing strings { @@ -1132,8 +1114,7 @@ static void test_json_schema() { test_schema( "required + optional props each in original order", // Schema - R"""( - { + R"""({ "type": "object", "properties": { "number": { "type": "number" }, @@ -1141,18 +1122,15 @@ static void test_json_schema() { "street_type": { "enum": ["Street", "Avenue", "Boulevard"] } }, "additionalProperties": false - } - )""", + })""", // Passing strings { R"""({ "street_name": "Pennsylvania" })""", R"""({ "number": 1600, "street_type":"Avenue"})""", R"""({ "number": 1600, "street_name": "Pennsylvania" })""", R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type":"Avenue"})""", -#ifdef INCLUDE_FAILING_TESTS - // TODO: Spaces should be permitted around enum values, but currently they fail to pass. + // Spaces are permitted around enum values R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type": "Avenue" })""", -#endif }, // Failing strings { @@ -1166,18 +1144,16 @@ static void test_json_schema() { test_schema( "required + optional props each in original order", // Schema - R"""( - { - "properties": { - "b": {"type": "string"}, - "a": {"type": "string"}, - "d": {"type": "string"}, - "c": {"type": "string"} - }, - "required": ["a", "b"], - "additionalProperties": false - } - )""", + R"""({ + "properties": { + "b": {"type": "string"}, + "a": {"type": "string"}, + "d": {"type": "string"}, + "c": {"type": "string"} + }, + "required": ["a", "b"], + "additionalProperties": false + })""", // Passing strings { R"""({"b": "foo", "a": "bar"})""", @@ -1197,8 +1173,7 @@ static void test_json_schema() { test_schema( "required props", // Schema - R"""( - { + R"""({ "$schema": "https://json-schema.org/draft/2020-12/schema", "$id": "https://example.com/product.schema.json", "title": "Product", @@ -1244,8 +1219,7 @@ static void test_json_schema() { } }, "required": [ "productId", "productName", "price" ] - } - )""", + })""", // Passing strings { R"""({"productId": 1, "productName": "A green door", "price": 12.50})""", diff --git a/tests/test-json-schema-to-grammar.cpp b/tests/test-json-schema-to-grammar.cpp index 2e591bd71abaa..1e69cb6ef36be 100755 --- a/tests/test-json-schema-to-grammar.cpp +++ b/tests/test-json-schema-to-grammar.cpp @@ -473,7 +473,7 @@ static void test_all(const std::string & lang, std::function Date: Wed, 26 Jun 2024 01:46:35 +0100 Subject: [PATCH 15/15] `json`: better support for "type" unions (e.g. nullable arrays w/ typed items) (#7863) * json: better suport for "type" arrays (e.g. `{"type": ["array", "null"], "items": {"type": "string"}}`) * json: add test for type: [array, null] fix * update tests --- common/json-schema-to-grammar.cpp | 4 ++- examples/json_schema_to_grammar.py | 2 +- .../server/public/json-schema-to-grammar.mjs | 2 +- tests/test-grammar-integration.cpp | 25 +++++++++++++++ tests/test-json-schema-to-grammar.cpp | 32 +++++++++++++++++++ 5 files changed, 62 insertions(+), 3 deletions(-) diff --git a/common/json-schema-to-grammar.cpp b/common/json-schema-to-grammar.cpp index b40821dadc05e..2f233e2e7477f 100644 --- a/common/json-schema-to-grammar.cpp +++ b/common/json-schema-to-grammar.cpp @@ -893,7 +893,9 @@ class SchemaConverter { } else if (schema_type.is_array()) { std::vector schema_types; for (const auto & t : schema_type) { - schema_types.push_back({{"type", t}}); + json schema_copy(schema); + schema_copy["type"] = t; + schema_types.push_back(schema_copy); } return _add_rule(rule_name, _generate_union_rule(name, schema_types)); } else if (schema.contains("const")) { diff --git a/examples/json_schema_to_grammar.py b/examples/json_schema_to_grammar.py index 3f3132f88a824..92f6e3d47bae7 100755 --- a/examples/json_schema_to_grammar.py +++ b/examples/json_schema_to_grammar.py @@ -565,7 +565,7 @@ def visit(self, schema, name): return self._add_rule(rule_name, self._generate_union_rule(name, schema.get('oneOf') or schema['anyOf'])) elif isinstance(schema_type, list): - return self._add_rule(rule_name, self._generate_union_rule(name, [{'type': t} for t in schema_type])) + return self._add_rule(rule_name, self._generate_union_rule(name, [{**schema, 'type': t} for t in schema_type])) elif 'const' in schema: return self._add_rule(rule_name, self._generate_constant_rule(schema['const']) + ' space') diff --git a/examples/server/public/json-schema-to-grammar.mjs b/examples/server/public/json-schema-to-grammar.mjs index 02015bbd49015..06d76edde500a 100644 --- a/examples/server/public/json-schema-to-grammar.mjs +++ b/examples/server/public/json-schema-to-grammar.mjs @@ -616,7 +616,7 @@ export class SchemaConverter { } else if (schema.oneOf || schema.anyOf) { return this._addRule(ruleName, this._generateUnionRule(name, schema.oneOf || schema.anyOf)); } else if (Array.isArray(schemaType)) { - return this._addRule(ruleName, this._generateUnionRule(name, schemaType.map(t => ({ type: t })))); + return this._addRule(ruleName, this._generateUnionRule(name, schemaType.map(t => ({...schema, type: t})))); } else if ('const' in schema) { return this._addRule(ruleName, this._generateConstantRule(schema.const) + ' space'); } else if ('enum' in schema) { diff --git a/tests/test-grammar-integration.cpp b/tests/test-grammar-integration.cpp index 23ef8324c1327..0e21dc7959943 100644 --- a/tests/test-grammar-integration.cpp +++ b/tests/test-grammar-integration.cpp @@ -993,6 +993,31 @@ static void test_json_schema() { } ); + test_schema( + "", + // Schema + R"""( + { + "type": ["array", "null"], + "items": { "type": "string" } + } + )""", + // Passing strings + { + "null", + "[]", + "[\"123\"]", + "[\"foo\", \"bar\"]", + }, + // Failing strings + { + "", + "[123]", + "\"foo\"", + "[\"foo\", 42]", + } + ); + test_schema( "min+max items", // Schema diff --git a/tests/test-json-schema-to-grammar.cpp b/tests/test-json-schema-to-grammar.cpp index 1e69cb6ef36be..3aaa11833de57 100755 --- a/tests/test-json-schema-to-grammar.cpp +++ b/tests/test-json-schema-to-grammar.cpp @@ -502,6 +502,38 @@ static void test_all(const std::string & lang, std::function