From c495df854291c9c9110c8432904528194b671fbd Mon Sep 17 00:00:00 2001 From: jhen Date: Sun, 28 Jul 2024 13:30:38 +0800 Subject: [PATCH 1/6] feat: add isChatTemplateSupported in model info --- android/src/main/jni.cpp | 11 +++++++ cpp/rn-llama.hpp | 8 +++++ example/ios/.xcode.env.local | 2 +- example/ios/Podfile.lock | 4 +-- example/package.json | 1 + example/src/App.tsx | 63 ++++++++++++++++++++++++------------ example/yarn.lock | 2 +- ios/RNLlama.mm | 7 +--- ios/RNLlamaContext.h | 9 +----- ios/RNLlamaContext.mm | 52 ++++++++++++----------------- 10 files changed, 89 insertions(+), 70 deletions(-) diff --git a/android/src/main/jni.cpp b/android/src/main/jni.cpp index 6a9b7e9..5278817 100644 --- a/android/src/main/jni.cpp +++ b/android/src/main/jni.cpp @@ -62,6 +62,16 @@ static inline void putDouble(JNIEnv *env, jobject map, const char *key, double v env->CallVoidMethod(map, putDoubleMethod, jKey, value); } +// Method to put boolean into WritableMap +static inline void putBoolean(JNIEnv *env, jobject map, const char *key, bool value) { + jclass mapClass = env->FindClass("com/facebook/react/bridge/WritableMap"); + jmethodID putBooleanMethod = env->GetMethodID(mapClass, "putBoolean", "(Ljava/lang/String;Z)V"); + + jstring jKey = env->NewStringUTF(key); + + env->CallVoidMethod(map, putBooleanMethod, jKey, value); +} + // Method to put WriteableMap into WritableMap static inline void putMap(JNIEnv *env, jobject map, const char *key, jobject value) { jclass mapClass = env->FindClass("com/facebook/react/bridge/WritableMap"); @@ -208,6 +218,7 @@ Java_com_rnllama_LlamaContext_loadModelDetails( putString(env, result, "desc", desc); putDouble(env, result, "size", llama_model_size(llama->model)); putDouble(env, result, "nParams", llama_model_n_params(llama->model)); + putBoolean(env, result, "isChatTemplateSupported", llama->validateModelChatTemplate()); putMap(env, result, "metadata", meta); return reinterpret_cast(result); diff --git a/cpp/rn-llama.hpp b/cpp/rn-llama.hpp index 69c3bdc..1d3bed2 100644 --- a/cpp/rn-llama.hpp +++ b/cpp/rn-llama.hpp @@ -229,6 +229,14 @@ struct llama_rn_context return true; } + bool validateModelChatTemplate() const { + llama_chat_message chat[] = {{"user", "test"}}; + + const int res = llama_chat_apply_template(model, nullptr, chat, 1, true, nullptr, 0); + + return res > 0; + } + void truncatePrompt(std::vector &prompt_tokens) { const int n_left = n_ctx - params.n_keep; const int n_block_size = n_left / 2; diff --git a/example/ios/.xcode.env.local b/example/ios/.xcode.env.local index 92bcef1..51de392 100644 --- a/example/ios/.xcode.env.local +++ b/example/ios/.xcode.env.local @@ -1 +1 @@ -export NODE_BINARY=/var/folders/4z/1d45cfts3936kdm7v9jl349r0000gn/T/yarn--1722061680584-0.19771203690487615/node +export NODE_BINARY=/var/folders/4z/1d45cfts3936kdm7v9jl349r0000gn/T/yarn--1722073570606-0.6759511337227031/node diff --git a/example/ios/Podfile.lock b/example/ios/Podfile.lock index e5e2c51..5f26ad0 100644 --- a/example/ios/Podfile.lock +++ b/example/ios/Podfile.lock @@ -8,7 +8,7 @@ PODS: - hermes-engine/Pre-built (= 0.72.3) - hermes-engine/Pre-built (0.72.3) - libevent (2.1.12) - - llama-rn (0.3.4): + - llama-rn (0.3.5): - RCT-Folly - RCTRequired - RCTTypeSafety @@ -1261,7 +1261,7 @@ SPEC CHECKSUMS: glog: 04b94705f318337d7ead9e6d17c019bd9b1f6b1b hermes-engine: 10fbd3f62405c41ea07e71973ea61e1878d07322 libevent: 4049cae6c81cdb3654a443be001fb9bdceff7913 - llama-rn: 1facf2ce116e23e89a526e30439f151eb03f460d + llama-rn: 1ab4e3bae3136c83dcc2bdcea1ddf0c861335d78 RCT-Folly: 424b8c9a7a0b9ab2886ffe9c3b041ef628fd4fb1 RCTRequired: a2faf4bad4e438ca37b2040cb8f7799baa065c18 RCTTypeSafety: cb09f3e4747b6d18331a15eb05271de7441ca0b3 diff --git a/example/package.json b/example/package.json index 731d770..5b69c31 100644 --- a/example/package.json +++ b/example/package.json @@ -12,6 +12,7 @@ "dependencies": { "@flyerhq/react-native-chat-ui": "^1.4.3", "@react-native-clipboard/clipboard": "^1.13.1", + "json5": "^2.2.3", "react": "18.2.0", "react-native": "0.72.3", "react-native-blob-util": "^0.19.1", diff --git a/example/src/App.tsx b/example/src/App.tsx index 75eef22..fe5bb3d 100644 --- a/example/src/App.tsx +++ b/example/src/App.tsx @@ -6,6 +6,7 @@ import DocumentPicker from 'react-native-document-picker' import type { DocumentPickerResponse } from 'react-native-document-picker' import { Chat, darkTheme } from '@flyerhq/react-native-chat-ui' import type { MessageType } from '@flyerhq/react-native-chat-ui' +import json5 from 'json5' import ReactNativeBlobUtil from 'react-native-blob-util' // eslint-disable-next-line import/no-unresolved import { initLlama, LlamaContext, convertJsonSchemaToGrammar } from 'llama.rn' @@ -73,7 +74,7 @@ export default function App() { } } - const addSystemMessage = (text: string, metadata = {} ) => { + const addSystemMessage = (text: string, metadata = {}) => { const textMessage: MessageType.Text = { author: system, createdAt: Date.now(), @@ -119,7 +120,7 @@ export default function App() { '- /release: release the context\n' + '- /stop: stop the current completion\n' + '- /reset: reset the conversation', - '- /save-session: save the session tokens\n' + + '- /save-session: save the session tokens\n' + '- /load-session: load the session tokens', ) }) @@ -166,12 +167,18 @@ export default function App() { const handleSendPress = async (message: MessageType.PartialText) => { if (context) { switch (message.text) { + case '/info': + addSystemMessage( + `// Model Info\n${json5.stringify(context.model, null, 2)}`, + { copyable: true }, + ) + return case '/bench': addSystemMessage('Heating up the model...') const t0 = Date.now() await context.bench(8, 4, 1, 1) const tHeat = Date.now() - t0 - if (tHeat > 1E4) { + if (tHeat > 1e4) { addSystemMessage('Heat up time is too long, please try again.') return } @@ -186,15 +193,21 @@ export default function App() { ppStd, tgAvg, tgStd, - } = await context.bench(512, 128, 1, 3) + } = await context.bench(512, 128, 1, 3) - const size = `${(modelSize / 1024.0 / 1024.0 / 1024.0).toFixed(2)} GiB` + const size = `${(modelSize / 1024.0 / 1024.0 / 1024.0).toFixed( + 2, + )} GiB` const nParams = `${(modelNParams / 1e9).toFixed(2)}B` const md = '| model | size | params | test | t/s |\n' + '| --- | --- | --- | --- | --- |\n' + - `| ${modelDesc} | ${size} | ${nParams} | pp 512 | ${ppAvg.toFixed(2)} ± ${ppStd.toFixed(2)} |\n` + - `| ${modelDesc} | ${size} | ${nParams} | tg 128 | ${tgAvg.toFixed(2)} ± ${tgStd.toFixed(2)}` + `| ${modelDesc} | ${size} | ${nParams} | pp 512 | ${ppAvg.toFixed( + 2, + )} ± ${ppStd.toFixed(2)} |\n` + + `| ${modelDesc} | ${size} | ${nParams} | tg 128 | ${tgAvg.toFixed( + 2, + )} ± ${tgStd.toFixed(2)}` addSystemMessage(md, { copyable: true }) return case '/release': @@ -208,22 +221,30 @@ export default function App() { addSystemMessage('Conversation reset!') return case '/save-session': - context.saveSession(`${dirs.DocumentDir}/llama-session.bin`).then(tokensSaved => { - console.log('Session tokens saved:', tokensSaved) - addSystemMessage(`Session saved! ${tokensSaved} tokens saved.`) - }).catch(e => { - console.log('Session save failed:', e) - addSystemMessage(`Session save failed: ${e.message}`) - }) + context + .saveSession(`${dirs.DocumentDir}/llama-session.bin`) + .then((tokensSaved) => { + console.log('Session tokens saved:', tokensSaved) + addSystemMessage(`Session saved! ${tokensSaved} tokens saved.`) + }) + .catch((e) => { + console.log('Session save failed:', e) + addSystemMessage(`Session save failed: ${e.message}`) + }) return case '/load-session': - context.loadSession(`${dirs.DocumentDir}/llama-session.bin`).then(details => { - console.log('Session loaded:', details) - addSystemMessage(`Session loaded! ${details.tokens_loaded} tokens loaded.`) - }).catch(e => { - console.log('Session load failed:', e) - addSystemMessage(`Session load failed: ${e.message}`) - }) + context + .loadSession(`${dirs.DocumentDir}/llama-session.bin`) + .then((details) => { + console.log('Session loaded:', details) + addSystemMessage( + `Session loaded! ${details.tokens_loaded} tokens loaded.`, + ) + }) + .catch((e) => { + console.log('Session load failed:', e) + addSystemMessage(`Session load failed: ${e.message}`) + }) return } } diff --git a/example/yarn.lock b/example/yarn.lock index 204708e..0ddcea9 100644 --- a/example/yarn.lock +++ b/example/yarn.lock @@ -3523,7 +3523,7 @@ json-stable-stringify@^1.0.2: dependencies: jsonify "^0.0.1" -json5@^2.1.1, json5@^2.2.2: +json5@^2.1.1, json5@^2.2.2, json5@^2.2.3: version "2.2.3" resolved "https://registry.yarnpkg.com/json5/-/json5-2.2.3.tgz#78cd6f1a19bdc12b73db5ad0c61efd66c1e29283" integrity sha512-XmOWe7eyHYH14cLdVPoyg+GOH3rYX++KpzrylJwSW98t3Nk+U8XOl8FWKOgwtzdb8lXGf6zYwDUzeHMWfxasyg== diff --git a/ios/RNLlama.mm b/ios/RNLlama.mm index d96441e..b8ce456 100644 --- a/ios/RNLlama.mm +++ b/ios/RNLlama.mm @@ -53,12 +53,7 @@ @implementation RNLlama @"contextId": contextIdNumber, @"gpu": @([context isMetalEnabled]), @"reasonNoGPU": [context reasonNoMetal], - @"model": @{ - @"desc": [context modelDesc], - @"size": @([context modelSize]), - @"nParams": @([context modelNParams]), - @"metadata": [context metadata], - } + @"model": [context modelInfo], }); } diff --git a/ios/RNLlamaContext.h b/ios/RNLlamaContext.h index 7e07c96..b772345 100644 --- a/ios/RNLlamaContext.h +++ b/ios/RNLlamaContext.h @@ -8,10 +8,6 @@ bool is_metal_enabled; NSString * reason_no_metal; bool is_model_loaded; - NSString * model_desc; - uint64_t model_size; - uint64_t model_n_params; - NSDictionary * metadata; rnllama::llama_rn_context * llama; } @@ -19,10 +15,7 @@ + (instancetype)initWithParams:(NSDictionary *)params; - (bool)isMetalEnabled; - (NSString *)reasonNoMetal; -- (NSDictionary *)metadata; -- (NSString *)modelDesc; -- (uint64_t)modelSize; -- (uint64_t)modelNParams; +- (NSDictionary *)modelInfo; - (bool)isModelLoaded; - (bool)isPredicting; - (NSDictionary *)completion:(NSDictionary *)params onToken:(void (^)(NSMutableDictionary *tokenResult))onToken; diff --git a/ios/RNLlamaContext.mm b/ios/RNLlamaContext.mm index 87d6f3c..d5f2492 100644 --- a/ios/RNLlamaContext.mm +++ b/ios/RNLlamaContext.mm @@ -82,26 +82,6 @@ + (instancetype)initWithParams:(NSDictionary *)params { context->is_metal_enabled = isMetalEnabled; context->reason_no_metal = reasonNoMetal; - int count = llama_model_meta_count(context->llama->model); - NSDictionary *meta = [[NSMutableDictionary alloc] init]; - for (int i = 0; i < count; i++) { - char key[256]; - llama_model_meta_key_by_index(context->llama->model, i, key, sizeof(key)); - char val[256]; - llama_model_meta_val_str_by_index(context->llama->model, i, val, sizeof(val)); - - NSString *keyStr = [NSString stringWithUTF8String:key]; - NSString *valStr = [NSString stringWithUTF8String:val]; - [meta setValue:valStr forKey:keyStr]; - } - context->metadata = meta; - - char desc[1024]; - llama_model_desc(context->llama->model, desc, sizeof(desc)); - context->model_desc = [NSString stringWithUTF8String:desc]; - context->model_size = llama_model_size(context->llama->model); - context->model_n_params = llama_model_n_params(context->llama->model); - return context; } @@ -113,20 +93,30 @@ - (NSString *)reasonNoMetal { return reason_no_metal; } -- (NSDictionary *)metadata { - return metadata; -} +- (NSDictionary *)modelInfo { + char desc[1024]; + llama_model_desc(llama->model, desc, sizeof(desc)); -- (NSString *)modelDesc { - return model_desc; -} + int count = llama_model_meta_count(llama->model); + NSDictionary *meta = [[NSMutableDictionary alloc] init]; + for (int i = 0; i < count; i++) { + char key[256]; + llama_model_meta_key_by_index(llama->model, i, key, sizeof(key)); + char val[256]; + llama_model_meta_val_str_by_index(llama->model, i, val, sizeof(val)); -- (uint64_t)modelSize { - return model_size; -} + NSString *keyStr = [NSString stringWithUTF8String:key]; + NSString *valStr = [NSString stringWithUTF8String:val]; + [meta setValue:valStr forKey:keyStr]; + } -- (uint64_t)modelNParams { - return model_n_params; + return @{ + @"desc": [NSString stringWithUTF8String:desc], + @"size": @(llama_model_size(llama->model)), + @"nParams": @(llama_model_n_params(llama->model)), + @"isChatTemplateSupported": @(llama->validateModelChatTemplate()), + @"metadata": meta + }; } - (bool)isModelLoaded { From a75662cf84dd1f9073263d0d0708c14c7753adca Mon Sep 17 00:00:00 2001 From: jhen Date: Sun, 28 Jul 2024 13:50:39 +0800 Subject: [PATCH 2/6] feat(ts): add formatChat util --- src/__tests__/chat.test.ts | 61 ++++++++++++++++++++++++++++++++++++++ src/chat.ts | 47 +++++++++++++++++++++++++++++ 2 files changed, 108 insertions(+) create mode 100644 src/__tests__/chat.test.ts create mode 100644 src/chat.ts diff --git a/src/__tests__/chat.test.ts b/src/__tests__/chat.test.ts new file mode 100644 index 0000000..9e78c0a --- /dev/null +++ b/src/__tests__/chat.test.ts @@ -0,0 +1,61 @@ +import { formatChat } from '../chat' + +describe('formatChat', () => { + it('should format chat messages', () => { + const messages = [ + { + role: 'user', + content: 'Hello, world!', + }, + { + role: 'bot', + content: [ + { + text: 'Hello, user!', + }, + { + text: 'How are you?', + }, + ], + }, + ] + + const expected = [ + { + role: 'user', + content: 'Hello, world!', + }, + { + role: 'bot', + content: 'Hello, user!\nHow are you?', + }, + ] + + expect(formatChat(messages)).toEqual(expected) + }) + + it('should throw an error if the content is missing', () => { + const messages = [ + { + role: 'user', + }, + ] + + expect(() => formatChat(messages)).toThrowError( + "Missing 'content' (ref: https://github.com/ggerganov/llama.cpp/issues/8367)", + ) + }) + + it('should throw an error if the content type is invalid', () => { + const messages = [ + { + role: 'user', + content: 42, + }, + ] + + expect(() => formatChat(messages)).toThrowError( + "Invalid 'content' type (ref: https://github.com/ggerganov/llama.cpp/issues/8367)", + ) + }) +}) diff --git a/src/chat.ts b/src/chat.ts new file mode 100644 index 0000000..9f18d00 --- /dev/null +++ b/src/chat.ts @@ -0,0 +1,47 @@ +export type RNLlamaMessagePart = { + text?: string +} + +export type RNLlamaOAICompatibleMessage = { + role: string + content?: string | RNLlamaMessagePart[] | any // any for check invalid content type +} + +export type RNLlamaChatMessage = { + role: string + content: string +} + +export function formatChat( + messages: RNLlamaOAICompatibleMessage[], +): RNLlamaChatMessage[] { + const chat: RNLlamaChatMessage[] = [] + + messages.forEach((currMsg) => { + const role: string = currMsg.role || '' + + let content: string = '' + if ('content' in currMsg) { + if (typeof currMsg.content === 'string') { + ;({ content } = currMsg) + } else if (Array.isArray(currMsg.content)) { + currMsg.content.forEach((part) => { + if ('text' in part) { + content += `${content ? '\n' : ''}${part.text}` + } + }) + } else { + throw new TypeError( + "Invalid 'content' type (ref: https://github.com/ggerganov/llama.cpp/issues/8367)", + ) + } + } else { + throw new Error( + "Missing 'content' (ref: https://github.com/ggerganov/llama.cpp/issues/8367)", + ) + } + + chat.push({ role, content }) + }) + return chat +} From 07c4ef7072daa61fb6eeb790450ceaa471a03fd9 Mon Sep 17 00:00:00 2001 From: jhen Date: Sun, 28 Jul 2024 14:36:46 +0800 Subject: [PATCH 3/6] feat(ts): add getFormattedChat native method --- .../main/java/com/rnllama/LlamaContext.java | 13 +++ .../src/main/java/com/rnllama/RNLlama.java | 32 ++++++++ android/src/main/jni.cpp | 40 ++++++++++ .../java/com/rnllama/RNLlamaModule.java | 5 ++ .../java/com/rnllama/RNLlamaModule.java | 5 ++ ios/RNLlama.mm | 14 ++++ ios/RNLlamaContext.h | 1 + ios/RNLlamaContext.mm | 14 ++++ src/NativeRNLlama.ts | 62 ++++++++++----- src/chat.ts | 11 +-- src/index.ts | 79 ++++++++++++------- 11 files changed, 221 insertions(+), 55 deletions(-) diff --git a/android/src/main/java/com/rnllama/LlamaContext.java b/android/src/main/java/com/rnllama/LlamaContext.java index 157419a..55d90f3 100644 --- a/android/src/main/java/com/rnllama/LlamaContext.java +++ b/android/src/main/java/com/rnllama/LlamaContext.java @@ -75,6 +75,14 @@ public WritableMap getModelDetails() { return modelDetails; } + public String getFormattedChat(ReadableArray messages, String chatTemplate) { + ReadableMap[] msgs = new ReadableMap[messages.size()]; + for (int i = 0; i < messages.size(); i++) { + msgs[i] = messages.getMap(i); + } + return getFormattedChat(this.context, msgs, chatTemplate == null ? "" : chatTemplate); + } + private void emitPartialCompletion(WritableMap tokenResult) { WritableMap event = Arguments.createMap(); event.putInt("contextId", LlamaContext.this.id); @@ -316,6 +324,11 @@ protected static native long initContext( protected static native WritableMap loadModelDetails( long contextPtr ); + protected static native String getFormattedChat( + long contextPtr, + ReadableMap[] messages, + String chatTemplate + ); protected static native WritableMap loadSession( long contextPtr, String path diff --git a/android/src/main/java/com/rnllama/RNLlama.java b/android/src/main/java/com/rnllama/RNLlama.java index 430eae7..ac96eb2 100644 --- a/android/src/main/java/com/rnllama/RNLlama.java +++ b/android/src/main/java/com/rnllama/RNLlama.java @@ -80,6 +80,38 @@ protected void onPostExecute(WritableMap result) { tasks.put(task, "initContext"); } + public void getFormattedChat(double id, final ReadableArray messages, final String chatTemplate, Promise promise) { + final int contextId = (int) id; + AsyncTask task = new AsyncTask() { + private Exception exception; + + @Override + protected String doInBackground(Void... voids) { + try { + LlamaContext context = contexts.get(contextId); + if (context == null) { + throw new Exception("Context not found"); + } + return context.getFormattedChat(messages, chatTemplate); + } catch (Exception e) { + exception = e; + return null; + } + } + + @Override + protected void onPostExecute(String result) { + if (exception != null) { + promise.reject(exception); + return; + } + promise.resolve(result); + tasks.remove(this); + } + }.executeOnExecutor(AsyncTask.THREAD_POOL_EXECUTOR); + tasks.put(task, "getFormattedChat-" + contextId); + } + public void loadSession(double id, final String path, Promise promise) { final int contextId = (int) id; AsyncTask task = new AsyncTask() { diff --git a/android/src/main/jni.cpp b/android/src/main/jni.cpp index 5278817..7078c9b 100644 --- a/android/src/main/jni.cpp +++ b/android/src/main/jni.cpp @@ -224,6 +224,46 @@ Java_com_rnllama_LlamaContext_loadModelDetails( return reinterpret_cast(result); } +JNIEXPORT jobject JNICALL +Java_com_rnllama_LlamaContext_getFormattedChat( + JNIEnv *env, + jobject thiz, + jlong context_ptr, + jobjectArray messages, + jstring chat_template +) { + UNUSED(thiz); + auto llama = context_map[(long) context_ptr]; + + std::vector chat; + + int messages_len = env->GetArrayLength(messages); + for (int i = 0; i < messages_len; i++) { + jobject msg = env->GetObjectArrayElement(messages, i); + jclass msgClass = env->GetObjectClass(msg); + + jmethodID getRoleMethod = env->GetMethodID(msgClass, "getString", "(Ljava/lang/String;)Ljava/lang/String;"); + jstring roleKey = env->NewStringUTF("role"); + jstring contentKey = env->NewStringUTF("content"); + + jstring role_str = (jstring) env->CallObjectMethod(msg, getRoleMethod, roleKey); + jstring content_str = (jstring) env->CallObjectMethod(msg, getRoleMethod, contentKey); + + const char *role = env->GetStringUTFChars(role_str, nullptr); + const char *content = env->GetStringUTFChars(content_str, nullptr); + + chat.push_back({ role, content }); + + env->ReleaseStringUTFChars(role_str, role); + env->ReleaseStringUTFChars(content_str, content); + } + + const char *tmpl_chars = env->GetStringUTFChars(chat_template, nullptr); + std::string formatted_chat = llama_chat_apply_template(llama->model, tmpl_chars, chat, true); + + return env->NewStringUTF(formatted_chat.c_str()); +} + JNIEXPORT jobject JNICALL Java_com_rnllama_LlamaContext_loadSession( JNIEnv *env, diff --git a/android/src/newarch/java/com/rnllama/RNLlamaModule.java b/android/src/newarch/java/com/rnllama/RNLlamaModule.java index 93d2722..7527c0f 100644 --- a/android/src/newarch/java/com/rnllama/RNLlamaModule.java +++ b/android/src/newarch/java/com/rnllama/RNLlamaModule.java @@ -42,6 +42,11 @@ public void initContext(final ReadableMap params, final Promise promise) { rnllama.initContext(params, promise); } + @ReactMethod + public void getFormattedChat(double id, ReadableArray messages, String chatTemplate, Promise promise) { + rnllama.getFormattedChat(id, messages, chatTemplate, promise); + } + @ReactMethod public void loadSession(double id, String path, Promise promise) { rnllama.loadSession(id, path, promise); diff --git a/android/src/oldarch/java/com/rnllama/RNLlamaModule.java b/android/src/oldarch/java/com/rnllama/RNLlamaModule.java index 814fb17..4e6cc6f 100644 --- a/android/src/oldarch/java/com/rnllama/RNLlamaModule.java +++ b/android/src/oldarch/java/com/rnllama/RNLlamaModule.java @@ -43,6 +43,11 @@ public void initContext(final ReadableMap params, final Promise promise) { rnllama.initContext(params, promise); } + @ReactMethod + public void getFormattedChat(double id, ReadableArray messages, String chatTemplate, Promise promise) { + rnllama.getFormattedChat(id, messages, chatTemplate, promise); + } + @ReactMethod public void loadSession(double id, String path, Promise promise) { rnllama.loadSession(id, path, promise); diff --git a/ios/RNLlama.mm b/ios/RNLlama.mm index b8ce456..89b37c0 100644 --- a/ios/RNLlama.mm +++ b/ios/RNLlama.mm @@ -57,6 +57,20 @@ @implementation RNLlama }); } +RCT_EXPORT_METHOD(getFormattedChat:(double)contextId + withMessages:(NSArray *)messages + withTemplate:(NSString *)chatTemplate + withResolver:(RCTPromiseResolveBlock)resolve + withRejecter:(RCTPromiseRejectBlock)reject) +{ + RNLlamaContext *context = llamaContexts[[NSNumber numberWithDouble:contextId]]; + if (context == nil) { + reject(@"llama_error", @"Context not found", nil); + return; + } + resolve([context getFormattedChat:messages withTemplate:chatTemplate]); +} + RCT_EXPORT_METHOD(loadSession:(double)contextId withFilePath:(NSString *)filePath withResolver:(RCTPromiseResolveBlock)resolve diff --git a/ios/RNLlamaContext.h b/ios/RNLlamaContext.h index b772345..37a34bb 100644 --- a/ios/RNLlamaContext.h +++ b/ios/RNLlamaContext.h @@ -23,6 +23,7 @@ - (NSArray *)tokenize:(NSString *)text; - (NSString *)detokenize:(NSArray *)tokens; - (NSArray *)embedding:(NSString *)text; +- (NSString *)getFormattedChat:(NSArray *)messages withTemplate:(NSString *)chatTemplate; - (NSDictionary *)loadSession:(NSString *)path; - (int)saveSession:(NSString *)path size:(int)size; - (NSString *)bench:(int)pp tg:(int)tg pl:(int)pl nr:(int)nr; diff --git a/ios/RNLlamaContext.mm b/ios/RNLlamaContext.mm index d5f2492..d478e3c 100644 --- a/ios/RNLlamaContext.mm +++ b/ios/RNLlamaContext.mm @@ -127,6 +127,20 @@ - (bool)isPredicting { return llama->is_predicting; } +- (NSString *)getFormattedChat:(NSArray *)messages withTemplate:(NSString *)chatTemplate { + std::vector chat; + + for (NSDictionary *msg in messages) { + std::string role = [[msg objectForKey:@"role"] UTF8String]; + std::string content = [[msg objectForKey:@"content"] UTF8String]; + chat.push_back({ role, content }); + } + + auto tmpl = chatTemplate == nil ? "" : [chatTemplate UTF8String]; + auto formatted_chat = llama_chat_apply_template(llama->model, tmpl, chat, true); + return [NSString stringWithUTF8String:formatted_chat.c_str()]; +} + - (NSArray *)tokenProbsToDict:(std::vector)probs { NSMutableArray *out = [[NSMutableArray alloc] init]; for (const auto &prob : probs) diff --git a/src/NativeRNLlama.ts b/src/NativeRNLlama.ts index 284de67..f2d6882 100644 --- a/src/NativeRNLlama.ts +++ b/src/NativeRNLlama.ts @@ -1,5 +1,5 @@ -import type { TurboModule } from 'react-native'; -import { TurboModuleRegistry } from 'react-native'; +import type { TurboModule } from 'react-native' +import { TurboModuleRegistry } from 'react-native' export type NativeContextParams = { model: string @@ -110,22 +110,48 @@ export type NativeSessionLoadResult = { prompt: string } -export interface Spec extends TurboModule { - setContextLimit(limit: number): Promise; - initContext(params: NativeContextParams): Promise; - - loadSession(contextId: number, filepath: string): Promise; - saveSession(contextId: number, filepath: string, size: number): Promise; - completion(contextId: number, params: NativeCompletionParams): Promise; - stopCompletion(contextId: number): Promise; - tokenize(contextId: number, text: string): Promise; - detokenize(contextId: number, tokens: number[]): Promise; - embedding(contextId: number, text: string): Promise; - bench(contextId: number, pp: number, tg: number, pl: number, nr: number): Promise; - - releaseContext(contextId: number): Promise; +export type NativeLlamaChatMessage = { + role: string + content: string +} - releaseAllContexts(): Promise; +export interface Spec extends TurboModule { + setContextLimit(limit: number): Promise + initContext(params: NativeContextParams): Promise + + getFormattedChat( + contextId: number, + messages: NativeLlamaChatMessage[], + chatTemplate?: string, + ): Promise + loadSession( + contextId: number, + filepath: string, + ): Promise + saveSession( + contextId: number, + filepath: string, + size: number, + ): Promise + completion( + contextId: number, + params: NativeCompletionParams, + ): Promise + stopCompletion(contextId: number): Promise + tokenize(contextId: number, text: string): Promise + detokenize(contextId: number, tokens: number[]): Promise + embedding(contextId: number, text: string): Promise + bench( + contextId: number, + pp: number, + tg: number, + pl: number, + nr: number, + ): Promise + + releaseContext(contextId: number): Promise + + releaseAllContexts(): Promise } -export default TurboModuleRegistry.get('RNLlama') as Spec; +export default TurboModuleRegistry.get('RNLlama') as Spec diff --git a/src/chat.ts b/src/chat.ts index 9f18d00..a88a374 100644 --- a/src/chat.ts +++ b/src/chat.ts @@ -1,3 +1,5 @@ +import type { NativeLlamaChatMessage } from './NativeRNLlama' + export type RNLlamaMessagePart = { text?: string } @@ -7,15 +9,10 @@ export type RNLlamaOAICompatibleMessage = { content?: string | RNLlamaMessagePart[] | any // any for check invalid content type } -export type RNLlamaChatMessage = { - role: string - content: string -} - export function formatChat( messages: RNLlamaOAICompatibleMessage[], -): RNLlamaChatMessage[] { - const chat: RNLlamaChatMessage[] = [] +): NativeLlamaChatMessage[] { + const chat: NativeLlamaChatMessage[] = [] messages.forEach((currMsg) => { const role: string = currMsg.role || '' diff --git a/src/index.ts b/src/index.ts index eabdebb..b6f616d 100644 --- a/src/index.ts +++ b/src/index.ts @@ -12,6 +12,8 @@ import type { NativeSessionLoadResult, } from './NativeRNLlama' import { SchemaGrammarConverter, convertJsonSchemaToGrammar } from './grammar' +import type { RNLlamaOAICompatibleMessage } from './chat' +import { formatChat } from './chat' export { SchemaGrammarConverter, convertJsonSchemaToGrammar } @@ -38,7 +40,10 @@ type TokenNativeEvent = { export type ContextParams = NativeContextParams -export type CompletionParams = Omit +export type CompletionParams = Omit< + NativeCompletionParams, + 'emit_partial_completion' +> export type BenchResult = { modelDesc: string @@ -57,14 +62,11 @@ export class LlamaContext { reasonNoGPU: string = '' - model: Object = {} + model: { + isChatTemplateSupported?: boolean + } = {} - constructor({ - contextId, - gpu, - reasonNoGPU, - model, - }: NativeLlamaContext) { + constructor({ contextId, gpu, reasonNoGPU, model }: NativeLlamaContext) { this.id = contextId this.gpu = gpu this.reasonNoGPU = reasonNoGPU @@ -83,22 +85,37 @@ export class LlamaContext { /** * Save current cached prompt & completion state to a file. */ - async saveSession(filepath: string, options?: { tokenSize: number }): Promise { + async saveSession( + filepath: string, + options?: { tokenSize: number }, + ): Promise { return RNLlama.saveSession(this.id, filepath, options?.tokenSize || -1) } + async getFormattedChat( + messages: RNLlamaOAICompatibleMessage[], + ): Promise { + const chat = formatChat(messages) + return RNLlama.getFormattedChat( + this.id, + chat, + this.model?.isChatTemplateSupported ? undefined : 'chatml', + ) + } + + // async chatCompletion() {} // TODO + async completion( params: CompletionParams, callback?: (data: TokenData) => void, ): Promise { - let tokenListener: any = callback && EventEmitter.addListener( - EVENT_ON_TOKEN, - (evt: TokenNativeEvent) => { + let tokenListener: any = + callback && + EventEmitter.addListener(EVENT_ON_TOKEN, (evt: TokenNativeEvent) => { const { contextId, tokenResult } = evt if (contextId !== this.id) return callback(tokenResult) - }, - ) + }) const promise = RNLlama.completion(this.id, { ...params, emit_partial_completion: !!callback, @@ -132,17 +149,15 @@ export class LlamaContext { return RNLlama.embedding(this.id, text) } - async bench(pp: number, tg: number, pl: number, nr: number): Promise { + async bench( + pp: number, + tg: number, + pl: number, + nr: number, + ): Promise { const result = await RNLlama.bench(this.id, pp, tg, pl, nr) - const [ - modelDesc, - modelSize, - modelNParams, - ppAvg, - ppStd, - tgAvg, - tgStd, - ] = JSON.parse(result) + const [modelDesc, modelSize, modelNParams, ppAvg, ppStd, tgAvg, tgStd] = + JSON.parse(result) return { modelDesc, modelSize, @@ -170,12 +185,16 @@ export async function initLlama({ }: ContextParams): Promise { let path = model if (path.startsWith('file://')) path = path.slice(7) - const { contextId, gpu, reasonNoGPU, model: modelDetails } = - await RNLlama.initContext({ - model: path, - is_model_asset: !!isModelAsset, - ...rest, - }) + const { + contextId, + gpu, + reasonNoGPU, + model: modelDetails, + } = await RNLlama.initContext({ + model: path, + is_model_asset: !!isModelAsset, + ...rest, + }) return new LlamaContext({ contextId, gpu, reasonNoGPU, model: modelDetails }) } From 944f508dc116250cdf48ed3f976f49c299740595 Mon Sep 17 00:00:00 2001 From: jhen Date: Sun, 28 Jul 2024 14:57:58 +0800 Subject: [PATCH 4/6] feat(ts): completion: add messages --- src/index.ts | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/src/index.ts b/src/index.ts index b6f616d..c0552a7 100644 --- a/src/index.ts +++ b/src/index.ts @@ -43,7 +43,9 @@ export type ContextParams = NativeContextParams export type CompletionParams = Omit< NativeCompletionParams, 'emit_partial_completion' -> +> & { + messages?: RNLlamaOAICompatibleMessage[] +} export type BenchResult = { modelDesc: string @@ -103,12 +105,17 @@ export class LlamaContext { ) } - // async chatCompletion() {} // TODO - async completion( params: CompletionParams, callback?: (data: TokenData) => void, ): Promise { + + let finalPrompt = params.prompt + if (params.messages) { // messages always win + finalPrompt = await this.getFormattedChat(params.messages) + console.log(finalPrompt) + } + let tokenListener: any = callback && EventEmitter.addListener(EVENT_ON_TOKEN, (evt: TokenNativeEvent) => { @@ -116,8 +123,10 @@ export class LlamaContext { if (contextId !== this.id) return callback(tokenResult) }) + const promise = RNLlama.completion(this.id, { ...params, + prompt: finalPrompt, emit_partial_completion: !!callback, }) return promise From ad7e0a58004eb9020765d6e4b9ca44016378b337 Mon Sep 17 00:00:00 2001 From: jhen Date: Sun, 28 Jul 2024 14:58:17 +0800 Subject: [PATCH 5/6] feat(example): use messages --- example/src/App.tsx | 89 +++++++++++++++++++++++++-------------------- src/index.ts | 5 ++- 2 files changed, 53 insertions(+), 41 deletions(-) diff --git a/example/src/App.tsx b/example/src/App.tsx index fe5bb3d..8be06c5 100644 --- a/example/src/App.tsx +++ b/example/src/App.tsx @@ -21,30 +21,10 @@ const user = { id: 'y9d7f8pgn' } const systemId = 'h3o3lc5xj' const system = { id: systemId } -const initialChatPrompt = - 'This is a conversation between user and llama, a friendly chatbot. respond in simple markdown.\n\n' - -const generateChatPrompt = ( - context: LlamaContext | undefined, - conversationId: string, - messages: MessageType.Any[], -) => { - const prompt = [...messages] - .reverse() - .map((msg) => { - if ( - !msg.metadata?.system && - msg.metadata?.conversationId === conversationId && - msg.metadata?.contextId === context?.id && - msg.type === 'text' - ) { - return `${msg.author.id === systemId ? 'llama' : 'User'}: ${msg.text}` - } - return '' - }) - .filter(Boolean) - .join('\n') - return initialChatPrompt + prompt +const systemMessage = { + role: 'system', + content: + 'This is a conversation between user and assistant, a friendly chatbot.\n\n', } const defaultConversationId = 'default' @@ -259,32 +239,50 @@ export default function App() { conversationId: conversationIdRef.current, }, } - addMessage(textMessage) - setInferencing(true) const id = randId() const createdAt = Date.now() - let prompt = generateChatPrompt(context, conversationIdRef.current, [ - textMessage, - ...messages, - ]) - prompt += `\nllama:` + const msgs = [ + systemMessage, + ...[...messages] + .reverse() + .map((msg) => { + if ( + !msg.metadata?.system && + msg.metadata?.conversationId === conversationIdRef.current && + msg.metadata?.contextId === context?.id && + msg.type === 'text' + ) { + return { + role: msg.author.id === systemId ? 'assistant' : 'user', + content: msg.text, + } + } + return { role: '', content: '' } + }) + .filter((msg) => msg.role), + { role: 'user', content: message.text }, + ] + addMessage(textMessage) + setInferencing(true) + // Test area { // Test tokenize + const formattedChat = (await context?.getFormattedChat(msgs)) || '' const t0 = Date.now() - const { tokens } = (await context?.tokenize(prompt)) || {} + const { tokens } = (await context?.tokenize(formattedChat)) || {} const t1 = Date.now() console.log( - 'Prompt:', - prompt, + 'Formatted:', + `"${formattedChat}"`, '\nTokenize:', tokens, `(${tokens?.length} tokens, ${t1 - t0}ms})`, ) // Test embedding - // await context?.embedding(prompt).then((result) => { + // await context?.embedding(formattedChat).then((result) => { // console.log('Embedding:', result) // }) @@ -342,7 +340,7 @@ export default function App() { context ?.completion( { - prompt, + messages: msgs, n_predict: 400, temperature: 0.7, top_k: 40, // <= 0 to use vocab size @@ -357,9 +355,19 @@ export default function App() { mirostat_tau: 5, // target entropy mirostat_eta: 0.1, // learning rate penalize_nl: false, // penalize newlines - seed: 1234, // random seed + seed: -1, // random seed n_probs: 0, // Show probabilities - stop: ['', 'llama:', 'User:'], + stop: [ + '', + '<|end|>', + '<|eot_id|>', + '<|end_of_text|>', + '<|im_end|>', + '<|EOT|>', + '<|END_OF_TURN_TOKEN|>', + '<|end_of_turn|>', + '<|endoftext|>', + ], grammar, // n_threads: 4, // logit_bias: [[15043,1.0]], @@ -386,7 +394,10 @@ export default function App() { id, text: token, type: 'text', - metadata: { contextId: context?.id }, + metadata: { + contextId: context?.id, + conversationId: conversationIdRef.current, + }, }, ...msgs, ] diff --git a/src/index.ts b/src/index.ts index c0552a7..151dd18 100644 --- a/src/index.ts +++ b/src/index.ts @@ -42,8 +42,9 @@ export type ContextParams = NativeContextParams export type CompletionParams = Omit< NativeCompletionParams, - 'emit_partial_completion' + 'emit_partial_completion' | 'prompt' > & { + prompt?: string messages?: RNLlamaOAICompatibleMessage[] } @@ -113,7 +114,6 @@ export class LlamaContext { let finalPrompt = params.prompt if (params.messages) { // messages always win finalPrompt = await this.getFormattedChat(params.messages) - console.log(finalPrompt) } let tokenListener: any = @@ -124,6 +124,7 @@ export class LlamaContext { callback(tokenResult) }) + if (!finalPrompt) throw new Error('Prompt is required') const promise = RNLlama.completion(this.id, { ...params, prompt: finalPrompt, From 23d8f0390e17d6f815bba077618eb139b9b1130d Mon Sep 17 00:00:00 2001 From: jhen Date: Sun, 28 Jul 2024 16:06:36 +0800 Subject: [PATCH 6/6] feat(docs): update --- README.md | 114 ++++++++++++++------- docs/API/README.md | 18 ++-- docs/API/classes/LlamaContext.md | 55 +++++++--- docs/API/classes/SchemaGrammarConverter.md | 32 +++--- 4 files changed, 141 insertions(+), 78 deletions(-) diff --git a/README.md b/README.md index ccf3017..175c2fc 100644 --- a/README.md +++ b/README.md @@ -34,10 +34,12 @@ You can search HuggingFace for available models (Keyword: [`GGUF`](https://huggi For create a GGUF model manually, for example in Llama 2: Download the Llama 2 model + 1. Request access from [here](https://ai.meta.com/llama) 2. Download the model from HuggingFace [here](https://huggingface.co/meta-llama/Llama-2-7b-chat) (`Llama-2-7b-chat`) Convert the model to ggml format + ```bash # Start with submodule in this repo (or you can clone the repo https://github.com/ggerganov/llama.cpp.git) yarn && yarn bootstrap @@ -76,26 +78,53 @@ const context = await initLlama({ // embedding: true, // use embedding }) -// Do completion -const { text, timings } = await context.completion( +const stopWords = ['', '<|end|>', '<|eot_id|>', '<|end_of_text|>', '<|im_end|>', '<|EOT|>', '<|END_OF_TURN_TOKEN|>', '<|end_of_turn|>', '<|endoftext|>'] + +// Do chat completion +const msgResult = await context.completion( + { + messages: [ + { + role: 'system', + content: 'This is a conversation between user and assistant, a friendly chatbot.', + }, + { + role: 'user', + content: 'Hello!', + }, + ], + n_predict: 100, + stop: stopWords, + // ...other params + }, + (data) => { + // This is a partial completion callback + const { token } = data + }, +) +console.log('Result:', msgResult.text) +console.log('Timings:', msgResult.timings) + +// Or do text completion +const textResult = await context.completion( { prompt: 'This is a conversation between user and llama, a friendly chatbot. respond in simple markdown.\n\nUser: Hello!\nLlama:', n_predict: 100, - stop: ['', 'Llama:', 'User:'], - // n_threads: 4, + stop: [...stopWords, 'Llama:', 'User:'], + // ...other params }, (data) => { // This is a partial completion callback const { token } = data }, ) -console.log('Result:', text) -console.log('Timings:', timings) +console.log('Result:', textResult.text) +console.log('Timings:', textResult.timings) ``` The binding’s deisgn inspired by [server.cpp](https://github.com/ggerganov/llama.cpp/tree/master/examples/server) example in llama.cpp, so you can map its API to LlamaContext: -- `/completion`: `context.completion(params, partialCompletionCallback)` +- `/completion` and `/chat/completions`: `context.completion(params, partialCompletionCallback)` - `/tokenize`: `context.tokenize(content)` - `/detokenize`: `context.detokenize(tokens)` - `/embedding`: `context.embedding(content)` @@ -110,6 +139,7 @@ Please visit the [Documentation](docs/API) for more details. You can also visit the [example](example) to see how to use it. Run the example: + ```bash yarn && yarn bootstrap @@ -142,7 +172,9 @@ You can see [GBNF Guide](https://github.com/ggerganov/llama.cpp/tree/master/gram ```js import { initLlama, convertJsonSchemaToGrammar } from 'llama.rn' -const schema = { /* JSON Schema, see below */ } +const schema = { + /* JSON Schema, see below */ +} const context = await initLlama({ model: 'file://', @@ -153,7 +185,7 @@ const context = await initLlama({ grammar: convertJsonSchemaToGrammar({ schema, propOrder: { function: 0, arguments: 1 }, - }) + }), }) const { text } = await context.completion({ @@ -171,80 +203,81 @@ console.log('Result:', text) { oneOf: [ { - type: "object", - name: "get_current_weather", - description: "Get the current weather in a given location", + type: 'object', + name: 'get_current_weather', + description: 'Get the current weather in a given location', properties: { function: { - const: "get_current_weather", + const: 'get_current_weather', }, arguments: { - type: "object", + type: 'object', properties: { location: { - type: "string", - description: "The city and state, e.g. San Francisco, CA", + type: 'string', + description: 'The city and state, e.g. San Francisco, CA', }, unit: { - type: "string", - enum: ["celsius", "fahrenheit"], + type: 'string', + enum: ['celsius', 'fahrenheit'], }, }, - required: ["location"], + required: ['location'], }, }, }, { - type: "object", - name: "create_event", - description: "Create a calendar event", + type: 'object', + name: 'create_event', + description: 'Create a calendar event', properties: { function: { - const: "create_event", + const: 'create_event', }, arguments: { - type: "object", + type: 'object', properties: { title: { - type: "string", - description: "The title of the event", + type: 'string', + description: 'The title of the event', }, date: { - type: "string", - description: "The date of the event", + type: 'string', + description: 'The date of the event', }, time: { - type: "string", - description: "The time of the event", + type: 'string', + description: 'The time of the event', }, }, - required: ["title", "date", "time"], + required: ['title', 'date', 'time'], }, }, }, { - type: "object", - name: "image_search", - description: "Search for an image", + type: 'object', + name: 'image_search', + description: 'Search for an image', properties: { function: { - const: "image_search", + const: 'image_search', }, arguments: { - type: "object", + type: 'object', properties: { query: { - type: "string", - description: "The search query", + type: 'string', + description: 'The search query', }, }, - required: ["query"], + required: ['query'], }, }, }, ], } ``` +
@@ -268,6 +301,7 @@ string ::= "\"" ( 2 ::= "{" space "\"function\"" space ":" space 2-function "," space "\"arguments\"" space ":" space 2-arguments "}" space root ::= 0 | 1 | 2 ``` +
## Mock `llama.rn` @@ -281,12 +315,14 @@ jest.mock('llama.rn', () => require('llama.rn/jest/mock')) ## NOTE iOS: + - The [Extended Virtual Addressing](https://developer.apple.com/documentation/bundleresources/entitlements/com_apple_developer_kernel_extended-virtual-addressing) capability is recommended to enable on iOS project. - Metal: - We have tested to know some devices is not able to use Metal ('params.n_gpu_layers > 0') due to llama.cpp used SIMD-scoped operation, you can check if your device is supported in [Metal feature set tables](https://developer.apple.com/metal/Metal-Feature-Set-Tables.pdf), Apple7 GPU will be the minimum requirement. - It's also not supported in iOS simulator due to [this limitation](https://developer.apple.com/documentation/metal/developing_metal_apps_that_run_in_simulator#3241609), we used constant buffers more than 14. Android: + - Currently only supported arm64-v8a / x86_64 platform, this means you can't initialize a context on another platforms. The 64-bit platform are recommended because it can allocate more memory for the model. - No integrated any GPU backend yet. diff --git a/docs/API/README.md b/docs/API/README.md index b998afd..cf9c944 100644 --- a/docs/API/README.md +++ b/docs/API/README.md @@ -43,17 +43,17 @@ llama.rn #### Defined in -[index.ts:43](https://github.com/mybigday/llama.rn/blob/f95f600/src/index.ts#L43) +[index.ts:51](https://github.com/mybigday/llama.rn/blob/ad7e0a5/src/index.ts#L51) ___ ### CompletionParams -Ƭ **CompletionParams**: `Omit`<`NativeCompletionParams`, ``"emit_partial_completion"``\> +Ƭ **CompletionParams**: `Omit`<`NativeCompletionParams`, ``"emit_partial_completion"`` \| ``"prompt"``\> & { `messages?`: `RNLlamaOAICompatibleMessage`[] ; `prompt?`: `string` } #### Defined in -[index.ts:41](https://github.com/mybigday/llama.rn/blob/f95f600/src/index.ts#L41) +[index.ts:43](https://github.com/mybigday/llama.rn/blob/ad7e0a5/src/index.ts#L43) ___ @@ -63,7 +63,7 @@ ___ #### Defined in -[index.ts:39](https://github.com/mybigday/llama.rn/blob/f95f600/src/index.ts#L39) +[index.ts:41](https://github.com/mybigday/llama.rn/blob/ad7e0a5/src/index.ts#L41) ___ @@ -80,7 +80,7 @@ ___ #### Defined in -[index.ts:29](https://github.com/mybigday/llama.rn/blob/f95f600/src/index.ts#L29) +[index.ts:31](https://github.com/mybigday/llama.rn/blob/ad7e0a5/src/index.ts#L31) ## Functions @@ -104,7 +104,7 @@ ___ #### Defined in -[grammar.ts:824](https://github.com/mybigday/llama.rn/blob/f95f600/src/grammar.ts#L824) +[grammar.ts:824](https://github.com/mybigday/llama.rn/blob/ad7e0a5/src/grammar.ts#L824) ___ @@ -124,7 +124,7 @@ ___ #### Defined in -[index.ts:166](https://github.com/mybigday/llama.rn/blob/f95f600/src/index.ts#L166) +[index.ts:191](https://github.com/mybigday/llama.rn/blob/ad7e0a5/src/index.ts#L191) ___ @@ -138,7 +138,7 @@ ___ #### Defined in -[index.ts:182](https://github.com/mybigday/llama.rn/blob/f95f600/src/index.ts#L182) +[index.ts:211](https://github.com/mybigday/llama.rn/blob/ad7e0a5/src/index.ts#L211) ___ @@ -158,4 +158,4 @@ ___ #### Defined in -[index.ts:162](https://github.com/mybigday/llama.rn/blob/f95f600/src/index.ts#L162) +[index.ts:187](https://github.com/mybigday/llama.rn/blob/ad7e0a5/src/index.ts#L187) diff --git a/docs/API/classes/LlamaContext.md b/docs/API/classes/LlamaContext.md index 984bc1d..4da652f 100644 --- a/docs/API/classes/LlamaContext.md +++ b/docs/API/classes/LlamaContext.md @@ -21,6 +21,7 @@ - [completion](LlamaContext.md#completion) - [detokenize](LlamaContext.md#detokenize) - [embedding](LlamaContext.md#embedding) +- [getFormattedChat](LlamaContext.md#getformattedchat) - [loadSession](LlamaContext.md#loadsession) - [release](LlamaContext.md#release) - [saveSession](LlamaContext.md#savesession) @@ -41,7 +42,7 @@ #### Defined in -[index.ts:62](https://github.com/mybigday/llama.rn/blob/f95f600/src/index.ts#L62) +[index.ts:72](https://github.com/mybigday/llama.rn/blob/ad7e0a5/src/index.ts#L72) ## Properties @@ -51,7 +52,7 @@ #### Defined in -[index.ts:56](https://github.com/mybigday/llama.rn/blob/f95f600/src/index.ts#L56) +[index.ts:64](https://github.com/mybigday/llama.rn/blob/ad7e0a5/src/index.ts#L64) ___ @@ -61,7 +62,7 @@ ___ #### Defined in -[index.ts:54](https://github.com/mybigday/llama.rn/blob/f95f600/src/index.ts#L54) +[index.ts:62](https://github.com/mybigday/llama.rn/blob/ad7e0a5/src/index.ts#L62) ___ @@ -69,9 +70,15 @@ ___ • **model**: `Object` = `{}` +#### Type declaration + +| Name | Type | +| :------ | :------ | +| `isChatTemplateSupported?` | `boolean` | + #### Defined in -[index.ts:60](https://github.com/mybigday/llama.rn/blob/f95f600/src/index.ts#L60) +[index.ts:68](https://github.com/mybigday/llama.rn/blob/ad7e0a5/src/index.ts#L68) ___ @@ -81,7 +88,7 @@ ___ #### Defined in -[index.ts:58](https://github.com/mybigday/llama.rn/blob/f95f600/src/index.ts#L58) +[index.ts:66](https://github.com/mybigday/llama.rn/blob/ad7e0a5/src/index.ts#L66) ## Methods @@ -104,7 +111,7 @@ ___ #### Defined in -[index.ts:135](https://github.com/mybigday/llama.rn/blob/f95f600/src/index.ts#L135) +[index.ts:162](https://github.com/mybigday/llama.rn/blob/ad7e0a5/src/index.ts#L162) ___ @@ -125,7 +132,7 @@ ___ #### Defined in -[index.ts:90](https://github.com/mybigday/llama.rn/blob/f95f600/src/index.ts#L90) +[index.ts:109](https://github.com/mybigday/llama.rn/blob/ad7e0a5/src/index.ts#L109) ___ @@ -145,7 +152,7 @@ ___ #### Defined in -[index.ts:127](https://github.com/mybigday/llama.rn/blob/f95f600/src/index.ts#L127) +[index.ts:154](https://github.com/mybigday/llama.rn/blob/ad7e0a5/src/index.ts#L154) ___ @@ -165,7 +172,27 @@ ___ #### Defined in -[index.ts:131](https://github.com/mybigday/llama.rn/blob/f95f600/src/index.ts#L131) +[index.ts:158](https://github.com/mybigday/llama.rn/blob/ad7e0a5/src/index.ts#L158) + +___ + +### getFormattedChat + +▸ **getFormattedChat**(`messages`): `Promise`<`string`\> + +#### Parameters + +| Name | Type | +| :------ | :------ | +| `messages` | `RNLlamaOAICompatibleMessage`[] | + +#### Returns + +`Promise`<`string`\> + +#### Defined in + +[index.ts:98](https://github.com/mybigday/llama.rn/blob/ad7e0a5/src/index.ts#L98) ___ @@ -187,7 +214,7 @@ Load cached prompt & completion state from a file. #### Defined in -[index.ts:77](https://github.com/mybigday/llama.rn/blob/f95f600/src/index.ts#L77) +[index.ts:82](https://github.com/mybigday/llama.rn/blob/ad7e0a5/src/index.ts#L82) ___ @@ -201,7 +228,7 @@ ___ #### Defined in -[index.ts:157](https://github.com/mybigday/llama.rn/blob/f95f600/src/index.ts#L157) +[index.ts:182](https://github.com/mybigday/llama.rn/blob/ad7e0a5/src/index.ts#L182) ___ @@ -225,7 +252,7 @@ Save current cached prompt & completion state to a file. #### Defined in -[index.ts:86](https://github.com/mybigday/llama.rn/blob/f95f600/src/index.ts#L86) +[index.ts:91](https://github.com/mybigday/llama.rn/blob/ad7e0a5/src/index.ts#L91) ___ @@ -239,7 +266,7 @@ ___ #### Defined in -[index.ts:119](https://github.com/mybigday/llama.rn/blob/f95f600/src/index.ts#L119) +[index.ts:146](https://github.com/mybigday/llama.rn/blob/ad7e0a5/src/index.ts#L146) ___ @@ -259,4 +286,4 @@ ___ #### Defined in -[index.ts:123](https://github.com/mybigday/llama.rn/blob/f95f600/src/index.ts#L123) +[index.ts:150](https://github.com/mybigday/llama.rn/blob/ad7e0a5/src/index.ts#L150) diff --git a/docs/API/classes/SchemaGrammarConverter.md b/docs/API/classes/SchemaGrammarConverter.md index 8b9a535..09cecb4 100644 --- a/docs/API/classes/SchemaGrammarConverter.md +++ b/docs/API/classes/SchemaGrammarConverter.md @@ -46,7 +46,7 @@ #### Defined in -[grammar.ts:211](https://github.com/mybigday/llama.rn/blob/f95f600/src/grammar.ts#L211) +[grammar.ts:211](https://github.com/mybigday/llama.rn/blob/ad7e0a5/src/grammar.ts#L211) ## Properties @@ -56,7 +56,7 @@ #### Defined in -[grammar.ts:201](https://github.com/mybigday/llama.rn/blob/f95f600/src/grammar.ts#L201) +[grammar.ts:201](https://github.com/mybigday/llama.rn/blob/ad7e0a5/src/grammar.ts#L201) ___ @@ -66,7 +66,7 @@ ___ #### Defined in -[grammar.ts:203](https://github.com/mybigday/llama.rn/blob/f95f600/src/grammar.ts#L203) +[grammar.ts:203](https://github.com/mybigday/llama.rn/blob/ad7e0a5/src/grammar.ts#L203) ___ @@ -76,7 +76,7 @@ ___ #### Defined in -[grammar.ts:199](https://github.com/mybigday/llama.rn/blob/f95f600/src/grammar.ts#L199) +[grammar.ts:199](https://github.com/mybigday/llama.rn/blob/ad7e0a5/src/grammar.ts#L199) ___ @@ -90,7 +90,7 @@ ___ #### Defined in -[grammar.ts:207](https://github.com/mybigday/llama.rn/blob/f95f600/src/grammar.ts#L207) +[grammar.ts:207](https://github.com/mybigday/llama.rn/blob/ad7e0a5/src/grammar.ts#L207) ___ @@ -100,7 +100,7 @@ ___ #### Defined in -[grammar.ts:209](https://github.com/mybigday/llama.rn/blob/f95f600/src/grammar.ts#L209) +[grammar.ts:209](https://github.com/mybigday/llama.rn/blob/ad7e0a5/src/grammar.ts#L209) ___ @@ -114,7 +114,7 @@ ___ #### Defined in -[grammar.ts:205](https://github.com/mybigday/llama.rn/blob/f95f600/src/grammar.ts#L205) +[grammar.ts:205](https://github.com/mybigday/llama.rn/blob/ad7e0a5/src/grammar.ts#L205) ## Methods @@ -135,7 +135,7 @@ ___ #### Defined in -[grammar.ts:693](https://github.com/mybigday/llama.rn/blob/f95f600/src/grammar.ts#L693) +[grammar.ts:693](https://github.com/mybigday/llama.rn/blob/ad7e0a5/src/grammar.ts#L693) ___ @@ -156,7 +156,7 @@ ___ #### Defined in -[grammar.ts:224](https://github.com/mybigday/llama.rn/blob/f95f600/src/grammar.ts#L224) +[grammar.ts:224](https://github.com/mybigday/llama.rn/blob/ad7e0a5/src/grammar.ts#L224) ___ @@ -179,7 +179,7 @@ ___ #### Defined in -[grammar.ts:710](https://github.com/mybigday/llama.rn/blob/f95f600/src/grammar.ts#L710) +[grammar.ts:710](https://github.com/mybigday/llama.rn/blob/ad7e0a5/src/grammar.ts#L710) ___ @@ -200,7 +200,7 @@ ___ #### Defined in -[grammar.ts:312](https://github.com/mybigday/llama.rn/blob/f95f600/src/grammar.ts#L312) +[grammar.ts:312](https://github.com/mybigday/llama.rn/blob/ad7e0a5/src/grammar.ts#L312) ___ @@ -220,7 +220,7 @@ ___ #### Defined in -[grammar.ts:518](https://github.com/mybigday/llama.rn/blob/f95f600/src/grammar.ts#L518) +[grammar.ts:518](https://github.com/mybigday/llama.rn/blob/ad7e0a5/src/grammar.ts#L518) ___ @@ -241,7 +241,7 @@ ___ #### Defined in -[grammar.ts:323](https://github.com/mybigday/llama.rn/blob/f95f600/src/grammar.ts#L323) +[grammar.ts:323](https://github.com/mybigday/llama.rn/blob/ad7e0a5/src/grammar.ts#L323) ___ @@ -255,7 +255,7 @@ ___ #### Defined in -[grammar.ts:813](https://github.com/mybigday/llama.rn/blob/f95f600/src/grammar.ts#L813) +[grammar.ts:813](https://github.com/mybigday/llama.rn/blob/ad7e0a5/src/grammar.ts#L813) ___ @@ -276,7 +276,7 @@ ___ #### Defined in -[grammar.ts:247](https://github.com/mybigday/llama.rn/blob/f95f600/src/grammar.ts#L247) +[grammar.ts:247](https://github.com/mybigday/llama.rn/blob/ad7e0a5/src/grammar.ts#L247) ___ @@ -297,4 +297,4 @@ ___ #### Defined in -[grammar.ts:529](https://github.com/mybigday/llama.rn/blob/f95f600/src/grammar.ts#L529) +[grammar.ts:529](https://github.com/mybigday/llama.rn/blob/ad7e0a5/src/grammar.ts#L529)