From c495df854291c9c9110c8432904528194b671fbd Mon Sep 17 00:00:00 2001
From: jhen <developer@jhen.me>
Date: Sun, 28 Jul 2024 13:30:38 +0800
Subject: [PATCH 1/6] feat: add isChatTemplateSupported in model info

---
 android/src/main/jni.cpp     | 11 +++++++
 cpp/rn-llama.hpp             |  8 +++++
 example/ios/.xcode.env.local |  2 +-
 example/ios/Podfile.lock     |  4 +--
 example/package.json         |  1 +
 example/src/App.tsx          | 63 ++++++++++++++++++++++++------------
 example/yarn.lock            |  2 +-
 ios/RNLlama.mm               |  7 +---
 ios/RNLlamaContext.h         |  9 +-----
 ios/RNLlamaContext.mm        | 52 ++++++++++++-----------------
 10 files changed, 89 insertions(+), 70 deletions(-)
diff --git a/android/src/main/jni.cpp b/android/src/main/jni.cpp
index 6a9b7e9..5278817 100644
--- a/android/src/main/jni.cpp
+++ b/android/src/main/jni.cpp
@@ -62,6 +62,16 @@ static inline void putDouble(JNIEnv *env, jobject map, const char *key, double v
     env->CallVoidMethod(map, putDoubleMethod, jKey, value);
 }
 
+// Method to put boolean into WritableMap
+static inline void putBoolean(JNIEnv *env, jobject map, const char *key, bool value) {
+    jclass mapClass = env->FindClass("com/facebook/react/bridge/WritableMap");
+    jmethodID putBooleanMethod = env->GetMethodID(mapClass, "putBoolean", "(Ljava/lang/String;Z)V");
+
+    jstring jKey = env->NewStringUTF(key);
+
+    env->CallVoidMethod(map, putBooleanMethod, jKey, value);
+}
+
 // Method to put WriteableMap into WritableMap
 static inline void putMap(JNIEnv *env, jobject map, const char *key, jobject value) {
     jclass mapClass = env->FindClass("com/facebook/react/bridge/WritableMap");
@@ -208,6 +218,7 @@ Java_com_rnllama_LlamaContext_loadModelDetails(
     putString(env, result, "desc", desc);
     putDouble(env, result, "size", llama_model_size(llama->model));
     putDouble(env, result, "nParams", llama_model_n_params(llama->model));
+    putBoolean(env, result, "isChatTemplateSupported", llama->validateModelChatTemplate());
     putMap(env, result, "metadata", meta);
 
     return reinterpret_cast<jobject>(result);
diff --git a/cpp/rn-llama.hpp b/cpp/rn-llama.hpp
index 69c3bdc..1d3bed2 100644
--- a/cpp/rn-llama.hpp
+++ b/cpp/rn-llama.hpp
@@ -229,6 +229,14 @@ struct llama_rn_context
         return true;
     }
 
+    bool validateModelChatTemplate() const {
+        llama_chat_message chat[] = {{"user", "test"}};
+
+        const int res = llama_chat_apply_template(model, nullptr, chat, 1, true, nullptr, 0);
+
+        return res > 0;
+    }
+
     void truncatePrompt(std::vector<llama_token> &prompt_tokens) {
         const int n_left = n_ctx - params.n_keep;
         const int n_block_size = n_left / 2;
diff --git a/example/ios/.xcode.env.local b/example/ios/.xcode.env.local
index 92bcef1..51de392 100644
--- a/example/ios/.xcode.env.local
+++ b/example/ios/.xcode.env.local
@@ -1 +1 @@
-export NODE_BINARY=/var/folders/4z/1d45cfts3936kdm7v9jl349r0000gn/T/yarn--1722061680584-0.19771203690487615/node
+export NODE_BINARY=/var/folders/4z/1d45cfts3936kdm7v9jl349r0000gn/T/yarn--1722073570606-0.6759511337227031/node
diff --git a/example/ios/Podfile.lock b/example/ios/Podfile.lock
index e5e2c51..5f26ad0 100644
--- a/example/ios/Podfile.lock
+++ b/example/ios/Podfile.lock
@@ -8,7 +8,7 @@ PODS:
     - hermes-engine/Pre-built (= 0.72.3)
   - hermes-engine/Pre-built (0.72.3)
   - libevent (2.1.12)
-  - llama-rn (0.3.4):
+  - llama-rn (0.3.5):
     - RCT-Folly
     - RCTRequired
     - RCTTypeSafety
@@ -1261,7 +1261,7 @@ SPEC CHECKSUMS:
   glog: 04b94705f318337d7ead9e6d17c019bd9b1f6b1b
   hermes-engine: 10fbd3f62405c41ea07e71973ea61e1878d07322
   libevent: 4049cae6c81cdb3654a443be001fb9bdceff7913
-  llama-rn: 1facf2ce116e23e89a526e30439f151eb03f460d
+  llama-rn: 1ab4e3bae3136c83dcc2bdcea1ddf0c861335d78
   RCT-Folly: 424b8c9a7a0b9ab2886ffe9c3b041ef628fd4fb1
   RCTRequired: a2faf4bad4e438ca37b2040cb8f7799baa065c18
   RCTTypeSafety: cb09f3e4747b6d18331a15eb05271de7441ca0b3
diff --git a/example/package.json b/example/package.json
index 731d770..5b69c31 100644
--- a/example/package.json
+++ b/example/package.json
@@ -12,6 +12,7 @@
   "dependencies": {
     "@flyerhq/react-native-chat-ui": "^1.4.3",
     "@react-native-clipboard/clipboard": "^1.13.1",
+    "json5": "^2.2.3",
     "react": "18.2.0",
     "react-native": "0.72.3",
     "react-native-blob-util": "^0.19.1",
diff --git a/example/src/App.tsx b/example/src/App.tsx
index 75eef22..fe5bb3d 100644
--- a/example/src/App.tsx
+++ b/example/src/App.tsx
@@ -6,6 +6,7 @@ import DocumentPicker from 'react-native-document-picker'
 import type { DocumentPickerResponse } from 'react-native-document-picker'
 import { Chat, darkTheme } from '@flyerhq/react-native-chat-ui'
 import type { MessageType } from '@flyerhq/react-native-chat-ui'
+import json5 from 'json5'
 import ReactNativeBlobUtil from 'react-native-blob-util'
 // eslint-disable-next-line import/no-unresolved
 import { initLlama, LlamaContext, convertJsonSchemaToGrammar } from 'llama.rn'
@@ -73,7 +74,7 @@ export default function App() {
     }
   }
 
-  const addSystemMessage = (text: string, metadata = {} ) => {
+  const addSystemMessage = (text: string, metadata = {}) => {
     const textMessage: MessageType.Text = {
       author: system,
       createdAt: Date.now(),
@@ -119,7 +120,7 @@ export default function App() {
             '- /release: release the context\n' +
             '- /stop: stop the current completion\n' +
             '- /reset: reset the conversation',
-            '- /save-session: save the session tokens\n' +
+          '- /save-session: save the session tokens\n' +
             '- /load-session: load the session tokens',
         )
       })
@@ -166,12 +167,18 @@ export default function App() {
   const handleSendPress = async (message: MessageType.PartialText) => {
     if (context) {
       switch (message.text) {
+        case '/info':
+          addSystemMessage(
+            `// Model Info\n${json5.stringify(context.model, null, 2)}`,
+            { copyable: true },
+          )
+          return
         case '/bench':
           addSystemMessage('Heating up the model...')
           const t0 = Date.now()
           await context.bench(8, 4, 1, 1)
           const tHeat = Date.now() - t0
-          if (tHeat > 1E4) {
+          if (tHeat > 1e4) {
             addSystemMessage('Heat up time is too long, please try again.')
             return
           }
@@ -186,15 +193,21 @@ export default function App() {
             ppStd,
             tgAvg,
             tgStd,
-           } = await context.bench(512, 128, 1, 3)
+          } = await context.bench(512, 128, 1, 3)
 
-          const size = `${(modelSize / 1024.0 / 1024.0 / 1024.0).toFixed(2)} GiB`
+          const size = `${(modelSize / 1024.0 / 1024.0 / 1024.0).toFixed(
+            2,
+          )} GiB`
           const nParams = `${(modelNParams / 1e9).toFixed(2)}B`
           const md =
             '| model | size | params | test | t/s |\n' +
             '| --- | --- | --- | --- | --- |\n' +
-            `| ${modelDesc} | ${size} | ${nParams} | pp 512 | ${ppAvg.toFixed(2)} ± ${ppStd.toFixed(2)} |\n` +
-            `| ${modelDesc} | ${size} | ${nParams} | tg 128 | ${tgAvg.toFixed(2)} ± ${tgStd.toFixed(2)}`
+            `| ${modelDesc} | ${size} | ${nParams} | pp 512 | ${ppAvg.toFixed(
+              2,
+            )} ± ${ppStd.toFixed(2)} |\n` +
+            `| ${modelDesc} | ${size} | ${nParams} | tg 128 | ${tgAvg.toFixed(
+              2,
+            )} ± ${tgStd.toFixed(2)}`
           addSystemMessage(md, { copyable: true })
           return
         case '/release':
@@ -208,22 +221,30 @@ export default function App() {
           addSystemMessage('Conversation reset!')
           return
         case '/save-session':
-          context.saveSession(`${dirs.DocumentDir}/llama-session.bin`).then(tokensSaved => {
-            console.log('Session tokens saved:', tokensSaved)
-            addSystemMessage(`Session saved! ${tokensSaved} tokens saved.`)
-          }).catch(e => {
-            console.log('Session save failed:', e)
-            addSystemMessage(`Session save failed: ${e.message}`)
-          })
+          context
+            .saveSession(`${dirs.DocumentDir}/llama-session.bin`)
+            .then((tokensSaved) => {
+              console.log('Session tokens saved:', tokensSaved)
+              addSystemMessage(`Session saved! ${tokensSaved} tokens saved.`)
+            })
+            .catch((e) => {
+              console.log('Session save failed:', e)
+              addSystemMessage(`Session save failed: ${e.message}`)
+            })
           return
         case '/load-session':
-          context.loadSession(`${dirs.DocumentDir}/llama-session.bin`).then(details => {
-            console.log('Session loaded:', details)
-            addSystemMessage(`Session loaded! ${details.tokens_loaded} tokens loaded.`)
-          }).catch(e => {
-            console.log('Session load failed:', e)
-            addSystemMessage(`Session load failed: ${e.message}`)
-          })
+          context
+            .loadSession(`${dirs.DocumentDir}/llama-session.bin`)
+            .then((details) => {
+              console.log('Session loaded:', details)
+              addSystemMessage(
+                `Session loaded! ${details.tokens_loaded} tokens loaded.`,
+              )
+            })
+            .catch((e) => {
+              console.log('Session load failed:', e)
+              addSystemMessage(`Session load failed: ${e.message}`)
+            })
           return
       }
     }
diff --git a/example/yarn.lock b/example/yarn.lock
index 204708e..0ddcea9 100644
--- a/example/yarn.lock
+++ b/example/yarn.lock
@@ -3523,7 +3523,7 @@ json-stable-stringify@^1.0.2:
   dependencies:
     jsonify "^0.0.1"
 
-json5@^2.1.1, json5@^2.2.2:
+json5@^2.1.1, json5@^2.2.2, json5@^2.2.3:
   version "2.2.3"
   resolved "https://registry.yarnpkg.com/json5/-/json5-2.2.3.tgz#78cd6f1a19bdc12b73db5ad0c61efd66c1e29283"
   integrity sha512-XmOWe7eyHYH14cLdVPoyg+GOH3rYX++KpzrylJwSW98t3Nk+U8XOl8FWKOgwtzdb8lXGf6zYwDUzeHMWfxasyg==
diff --git a/ios/RNLlama.mm b/ios/RNLlama.mm
index d96441e..b8ce456 100644
--- a/ios/RNLlama.mm
+++ b/ios/RNLlama.mm
@@ -53,12 +53,7 @@ @implementation RNLlama
         @"contextId": contextIdNumber,
         @"gpu": @([context isMetalEnabled]),
         @"reasonNoGPU": [context reasonNoMetal],
-        @"model": @{
-          @"desc": [context modelDesc],
-          @"size": @([context modelSize]),
-          @"nParams": @([context modelNParams]),
-          @"metadata": [context metadata],
-        }
+        @"model": [context modelInfo],
     });
 }
 
diff --git a/ios/RNLlamaContext.h b/ios/RNLlamaContext.h
index 7e07c96..b772345 100644
--- a/ios/RNLlamaContext.h
+++ b/ios/RNLlamaContext.h
@@ -8,10 +8,6 @@
     bool is_metal_enabled;
     NSString * reason_no_metal;
     bool is_model_loaded;
-    NSString * model_desc;
-    uint64_t model_size;
-    uint64_t model_n_params;
-    NSDictionary * metadata;
 
     rnllama::llama_rn_context * llama;
 }
@@ -19,10 +15,7 @@
 + (instancetype)initWithParams:(NSDictionary *)params;
 - (bool)isMetalEnabled;
 - (NSString *)reasonNoMetal;
-- (NSDictionary *)metadata;
-- (NSString *)modelDesc;
-- (uint64_t)modelSize;
-- (uint64_t)modelNParams;
+- (NSDictionary *)modelInfo;
 - (bool)isModelLoaded;
 - (bool)isPredicting;
 - (NSDictionary *)completion:(NSDictionary *)params onToken:(void (^)(NSMutableDictionary *tokenResult))onToken;
diff --git a/ios/RNLlamaContext.mm b/ios/RNLlamaContext.mm
index 87d6f3c..d5f2492 100644
--- a/ios/RNLlamaContext.mm
+++ b/ios/RNLlamaContext.mm
@@ -82,26 +82,6 @@ + (instancetype)initWithParams:(NSDictionary *)params {
     context->is_metal_enabled = isMetalEnabled;
     context->reason_no_metal = reasonNoMetal;
 
-    int count = llama_model_meta_count(context->llama->model);
-    NSDictionary *meta = [[NSMutableDictionary alloc] init];
-    for (int i = 0; i < count; i++) {
-        char key[256];
-        llama_model_meta_key_by_index(context->llama->model, i, key, sizeof(key));
-        char val[256];
-        llama_model_meta_val_str_by_index(context->llama->model, i, val, sizeof(val));
-
-        NSString *keyStr = [NSString stringWithUTF8String:key];
-        NSString *valStr = [NSString stringWithUTF8String:val];
-        [meta setValue:valStr forKey:keyStr];
-    }
-    context->metadata = meta;
-
-    char desc[1024];
-    llama_model_desc(context->llama->model, desc, sizeof(desc));
-    context->model_desc = [NSString stringWithUTF8String:desc];
-    context->model_size = llama_model_size(context->llama->model);
-    context->model_n_params = llama_model_n_params(context->llama->model);
-
     return context;
 }
 
@@ -113,20 +93,30 @@ - (NSString *)reasonNoMetal {
     return reason_no_metal;
 }
 
-- (NSDictionary *)metadata {
-    return metadata;
-}
+- (NSDictionary *)modelInfo {
+    char desc[1024];
+    llama_model_desc(llama->model, desc, sizeof(desc));
 
-- (NSString *)modelDesc {
-    return model_desc;
-}
+    int count = llama_model_meta_count(llama->model);
+    NSDictionary *meta = [[NSMutableDictionary alloc] init];
+    for (int i = 0; i < count; i++) {
+        char key[256];
+        llama_model_meta_key_by_index(llama->model, i, key, sizeof(key));
+        char val[256];
+        llama_model_meta_val_str_by_index(llama->model, i, val, sizeof(val));
 
-- (uint64_t)modelSize {
-    return model_size;
-}
+        NSString *keyStr = [NSString stringWithUTF8String:key];
+        NSString *valStr = [NSString stringWithUTF8String:val];
+        [meta setValue:valStr forKey:keyStr];
+    }
 
-- (uint64_t)modelNParams {
-    return model_n_params;
+    return @{
+        @"desc": [NSString stringWithUTF8String:desc],
+        @"size": @(llama_model_size(llama->model)),
+        @"nParams": @(llama_model_n_params(llama->model)),
+        @"isChatTemplateSupported": @(llama->validateModelChatTemplate()),
+        @"metadata": meta
+    };
 }
 
 - (bool)isModelLoaded {

From a75662cf84dd1f9073263d0d0708c14c7753adca Mon Sep 17 00:00:00 2001
From: jhen <developer@jhen.me>
Date: Sun, 28 Jul 2024 13:50:39 +0800
Subject: [PATCH 2/6] feat(ts): add formatChat util

---
 src/__tests__/chat.test.ts | 61 ++++++++++++++++++++++++++++++++++++++
 src/chat.ts                | 47 +++++++++++++++++++++++++++++
 2 files changed, 108 insertions(+)
 create mode 100644 src/__tests__/chat.test.ts
 create mode 100644 src/chat.ts

diff --git a/src/__tests__/chat.test.ts b/src/__tests__/chat.test.ts
new file mode 100644
index 0000000..9e78c0a
--- /dev/null
+++ b/src/__tests__/chat.test.ts
@@ -0,0 +1,61 @@
+import { formatChat } from '../chat'
+
+describe('formatChat', () => {
+  it('should format chat messages', () => {
+    const messages = [
+      {
+        role: 'user',
+        content: 'Hello, world!',
+      },
+      {
+        role: 'bot',
+        content: [
+          {
+            text: 'Hello, user!',
+          },
+          {
+            text: 'How are you?',
+          },
+        ],
+      },
+    ]
+
+    const expected = [
+      {
+        role: 'user',
+        content: 'Hello, world!',
+      },
+      {
+        role: 'bot',
+        content: 'Hello, user!\nHow are you?',
+      },
+    ]
+
+    expect(formatChat(messages)).toEqual(expected)
+  })
+
+  it('should throw an error if the content is missing', () => {
+    const messages = [
+      {
+        role: 'user',
+      },
+    ]
+
+    expect(() => formatChat(messages)).toThrowError(
+      "Missing 'content' (ref: https://github.com/ggerganov/llama.cpp/issues/8367)",
+    )
+  })
+
+  it('should throw an error if the content type is invalid', () => {
+    const messages = [
+      {
+        role: 'user',
+        content: 42,
+      },
+    ]
+
+    expect(() => formatChat(messages)).toThrowError(
+      "Invalid 'content' type (ref: https://github.com/ggerganov/llama.cpp/issues/8367)",
+    )
+  })
+})
diff --git a/src/chat.ts b/src/chat.ts
new file mode 100644
index 0000000..9f18d00
--- /dev/null
+++ b/src/chat.ts
@@ -0,0 +1,47 @@
+export type RNLlamaMessagePart = {
+  text?: string
+}
+
+export type RNLlamaOAICompatibleMessage = {
+  role: string
+  content?: string | RNLlamaMessagePart[] | any // any for check invalid content type
+}
+
+export type RNLlamaChatMessage = {
+  role: string
+  content: string
+}
+
+export function formatChat(
+  messages: RNLlamaOAICompatibleMessage[],
+): RNLlamaChatMessage[] {
+  const chat: RNLlamaChatMessage[] = []
+
+  messages.forEach((currMsg) => {
+    const role: string = currMsg.role || ''
+
+    let content: string = ''
+    if ('content' in currMsg) {
+      if (typeof currMsg.content === 'string') {
+        ;({ content } = currMsg)
+      } else if (Array.isArray(currMsg.content)) {
+        currMsg.content.forEach((part) => {
+          if ('text' in part) {
+            content += `${content ? '\n' : ''}${part.text}`
+          }
+        })
+      } else {
+        throw new TypeError(
+          "Invalid 'content' type (ref: https://github.com/ggerganov/llama.cpp/issues/8367)",
+        )
+      }
+    } else {
+      throw new Error(
+        "Missing 'content' (ref: https://github.com/ggerganov/llama.cpp/issues/8367)",
+      )
+    }
+
+    chat.push({ role, content })
+  })
+  return chat
+}

From 07c4ef7072daa61fb6eeb790450ceaa471a03fd9 Mon Sep 17 00:00:00 2001
From: jhen <developer@jhen.me>
Date: Sun, 28 Jul 2024 14:36:46 +0800
Subject: [PATCH 3/6] feat(ts): add getFormattedChat native method

---
 .../main/java/com/rnllama/LlamaContext.java   | 13 +++
 .../src/main/java/com/rnllama/RNLlama.java    | 32 ++++++++
 android/src/main/jni.cpp                      | 40 ++++++++++
 .../java/com/rnllama/RNLlamaModule.java       |  5 ++
 .../java/com/rnllama/RNLlamaModule.java       |  5 ++
 ios/RNLlama.mm                                | 14 ++++
 ios/RNLlamaContext.h                          |  1 +
 ios/RNLlamaContext.mm                         | 14 ++++
 src/NativeRNLlama.ts                          | 62 ++++++++++-----
 src/chat.ts                                   | 11 +--
 src/index.ts                                  | 79 ++++++++++++-------
 11 files changed, 221 insertions(+), 55 deletions(-)

diff --git a/android/src/main/java/com/rnllama/LlamaContext.java b/android/src/main/java/com/rnllama/LlamaContext.java
index 157419a..55d90f3 100644
--- a/android/src/main/java/com/rnllama/LlamaContext.java
+++ b/android/src/main/java/com/rnllama/LlamaContext.java
@@ -75,6 +75,14 @@ public WritableMap getModelDetails() {
     return modelDetails;
   }
 
+  public String getFormattedChat(ReadableArray messages, String chatTemplate) {
+    ReadableMap[] msgs = new ReadableMap[messages.size()];
+    for (int i = 0; i < messages.size(); i++) {
+      msgs[i] = messages.getMap(i);
+    }
+    return getFormattedChat(this.context, msgs, chatTemplate == null ? "" : chatTemplate);
+  }
+
   private void emitPartialCompletion(WritableMap tokenResult) {
     WritableMap event = Arguments.createMap();
     event.putInt("contextId", LlamaContext.this.id);
@@ -316,6 +324,11 @@ protected static native long initContext(
   protected static native WritableMap loadModelDetails(
     long contextPtr
   );
+  protected static native String getFormattedChat(
+    long contextPtr,
+    ReadableMap[] messages,
+    String chatTemplate
+  );
   protected static native WritableMap loadSession(
     long contextPtr,
     String path
diff --git a/android/src/main/java/com/rnllama/RNLlama.java b/android/src/main/java/com/rnllama/RNLlama.java
index 430eae7..ac96eb2 100644
--- a/android/src/main/java/com/rnllama/RNLlama.java
+++ b/android/src/main/java/com/rnllama/RNLlama.java
@@ -80,6 +80,38 @@ protected void onPostExecute(WritableMap result) {
     tasks.put(task, "initContext");
   }
 
+  public void getFormattedChat(double id, final ReadableArray messages, final String chatTemplate, Promise promise) {
+    final int contextId = (int) id;
+    AsyncTask task = new AsyncTask<Void, Void, String>() {
+      private Exception exception;
+
+      @Override
+      protected String doInBackground(Void... voids) {
+        try {
+          LlamaContext context = contexts.get(contextId);
+          if (context == null) {
+            throw new Exception("Context not found");
+          }
+          return context.getFormattedChat(messages, chatTemplate);
+        } catch (Exception e) {
+          exception = e;
+          return null;
+        }
+      }
+
+      @Override
+      protected void onPostExecute(String result) {
+        if (exception != null) {
+          promise.reject(exception);
+          return;
+        }
+        promise.resolve(result);
+        tasks.remove(this);
+      }
+    }.executeOnExecutor(AsyncTask.THREAD_POOL_EXECUTOR);
+    tasks.put(task, "getFormattedChat-" + contextId);
+  }
+
   public void loadSession(double id, final String path, Promise promise) {
     final int contextId = (int) id;
     AsyncTask task = new AsyncTask<Void, Void, WritableMap>() {
diff --git a/android/src/main/jni.cpp b/android/src/main/jni.cpp
index 5278817..7078c9b 100644
--- a/android/src/main/jni.cpp
+++ b/android/src/main/jni.cpp
@@ -224,6 +224,46 @@ Java_com_rnllama_LlamaContext_loadModelDetails(
     return reinterpret_cast<jobject>(result);
 }
 
+JNIEXPORT jobject JNICALL
+Java_com_rnllama_LlamaContext_getFormattedChat(
+    JNIEnv *env,
+    jobject thiz,
+    jlong context_ptr,
+    jobjectArray messages,
+    jstring chat_template
+) {
+    UNUSED(thiz);
+    auto llama = context_map[(long) context_ptr];
+
+    std::vector<llama_chat_msg> chat;
+
+    int messages_len = env->GetArrayLength(messages);
+    for (int i = 0; i < messages_len; i++) {
+        jobject msg = env->GetObjectArrayElement(messages, i);
+        jclass msgClass = env->GetObjectClass(msg);
+
+        jmethodID getRoleMethod = env->GetMethodID(msgClass, "getString", "(Ljava/lang/String;)Ljava/lang/String;");
+        jstring roleKey = env->NewStringUTF("role");
+        jstring contentKey = env->NewStringUTF("content");
+
+        jstring role_str = (jstring) env->CallObjectMethod(msg, getRoleMethod, roleKey);
+        jstring content_str = (jstring) env->CallObjectMethod(msg, getRoleMethod, contentKey);
+
+        const char *role = env->GetStringUTFChars(role_str, nullptr);
+        const char *content = env->GetStringUTFChars(content_str, nullptr);
+
+        chat.push_back({ role, content });
+
+        env->ReleaseStringUTFChars(role_str, role);
+        env->ReleaseStringUTFChars(content_str, content);
+    }
+
+    const char *tmpl_chars = env->GetStringUTFChars(chat_template, nullptr);
+    std::string formatted_chat = llama_chat_apply_template(llama->model, tmpl_chars, chat, true);
+
+    return env->NewStringUTF(formatted_chat.c_str());
+}
+
 JNIEXPORT jobject JNICALL
 Java_com_rnllama_LlamaContext_loadSession(
     JNIEnv *env,
diff --git a/android/src/newarch/java/com/rnllama/RNLlamaModule.java b/android/src/newarch/java/com/rnllama/RNLlamaModule.java
index 93d2722..7527c0f 100644
--- a/android/src/newarch/java/com/rnllama/RNLlamaModule.java
+++ b/android/src/newarch/java/com/rnllama/RNLlamaModule.java
@@ -42,6 +42,11 @@ public void initContext(final ReadableMap params, final Promise promise) {
     rnllama.initContext(params, promise);
   }
 
+  @ReactMethod
+  public void getFormattedChat(double id, ReadableArray messages, String chatTemplate, Promise promise) {
+    rnllama.getFormattedChat(id, messages, chatTemplate, promise);
+  }
+
   @ReactMethod
   public void loadSession(double id, String path, Promise promise) {
     rnllama.loadSession(id, path, promise);
diff --git a/android/src/oldarch/java/com/rnllama/RNLlamaModule.java b/android/src/oldarch/java/com/rnllama/RNLlamaModule.java
index 814fb17..4e6cc6f 100644
--- a/android/src/oldarch/java/com/rnllama/RNLlamaModule.java
+++ b/android/src/oldarch/java/com/rnllama/RNLlamaModule.java
@@ -43,6 +43,11 @@ public void initContext(final ReadableMap params, final Promise promise) {
     rnllama.initContext(params, promise);
   }
 
+  @ReactMethod
+  public void getFormattedChat(double id, ReadableArray messages, String chatTemplate, Promise promise) {
+    rnllama.getFormattedChat(id, messages, chatTemplate, promise);
+  }
+
   @ReactMethod
   public void loadSession(double id, String path, Promise promise) {
     rnllama.loadSession(id, path, promise);
diff --git a/ios/RNLlama.mm b/ios/RNLlama.mm
index b8ce456..89b37c0 100644
--- a/ios/RNLlama.mm
+++ b/ios/RNLlama.mm
@@ -57,6 +57,20 @@ @implementation RNLlama
     });
 }
 
+RCT_EXPORT_METHOD(getFormattedChat:(double)contextId
+                 withMessages:(NSArray *)messages
+                 withTemplate:(NSString *)chatTemplate
+                 withResolver:(RCTPromiseResolveBlock)resolve
+                 withRejecter:(RCTPromiseRejectBlock)reject)
+{
+    RNLlamaContext *context = llamaContexts[[NSNumber numberWithDouble:contextId]];
+    if (context == nil) {
+        reject(@"llama_error", @"Context not found", nil);
+        return;
+    }
+    resolve([context getFormattedChat:messages withTemplate:chatTemplate]);
+}
+
 RCT_EXPORT_METHOD(loadSession:(double)contextId
                  withFilePath:(NSString *)filePath
                  withResolver:(RCTPromiseResolveBlock)resolve
diff --git a/ios/RNLlamaContext.h b/ios/RNLlamaContext.h
index b772345..37a34bb 100644
--- a/ios/RNLlamaContext.h
+++ b/ios/RNLlamaContext.h
@@ -23,6 +23,7 @@
 - (NSArray *)tokenize:(NSString *)text;
 - (NSString *)detokenize:(NSArray *)tokens;
 - (NSArray *)embedding:(NSString *)text;
+- (NSString *)getFormattedChat:(NSArray *)messages withTemplate:(NSString *)chatTemplate;
 - (NSDictionary *)loadSession:(NSString *)path;
 - (int)saveSession:(NSString *)path size:(int)size;
 - (NSString *)bench:(int)pp tg:(int)tg pl:(int)pl nr:(int)nr;
diff --git a/ios/RNLlamaContext.mm b/ios/RNLlamaContext.mm
index d5f2492..d478e3c 100644
--- a/ios/RNLlamaContext.mm
+++ b/ios/RNLlamaContext.mm
@@ -127,6 +127,20 @@ - (bool)isPredicting {
     return llama->is_predicting;
 }
 
+- (NSString *)getFormattedChat:(NSArray *)messages withTemplate:(NSString *)chatTemplate {
+  std::vector<llama_chat_msg> chat;
+
+  for (NSDictionary *msg in messages) {
+    std::string role = [[msg objectForKey:@"role"] UTF8String];
+    std::string content = [[msg objectForKey:@"content"] UTF8String];
+    chat.push_back({ role, content });
+  }
+
+  auto tmpl = chatTemplate == nil ? "" : [chatTemplate UTF8String];
+  auto formatted_chat = llama_chat_apply_template(llama->model, tmpl, chat, true);
+  return [NSString stringWithUTF8String:formatted_chat.c_str()];
+}
+
 - (NSArray *)tokenProbsToDict:(std::vector<rnllama::completion_token_output>)probs {
     NSMutableArray *out = [[NSMutableArray alloc] init];
     for (const auto &prob : probs)
diff --git a/src/NativeRNLlama.ts b/src/NativeRNLlama.ts
index 284de67..f2d6882 100644
--- a/src/NativeRNLlama.ts
+++ b/src/NativeRNLlama.ts
@@ -1,5 +1,5 @@
-import type { TurboModule } from 'react-native';
-import { TurboModuleRegistry } from 'react-native';
+import type { TurboModule } from 'react-native'
+import { TurboModuleRegistry } from 'react-native'
 
 export type NativeContextParams = {
   model: string
@@ -110,22 +110,48 @@ export type NativeSessionLoadResult = {
   prompt: string
 }
 
-export interface Spec extends TurboModule {
-  setContextLimit(limit: number): Promise<void>;
-  initContext(params: NativeContextParams): Promise<NativeLlamaContext>;
-
-  loadSession(contextId: number, filepath: string): Promise<NativeSessionLoadResult>;
-  saveSession(contextId: number, filepath: string, size: number): Promise<number>;
-  completion(contextId: number, params: NativeCompletionParams): Promise<NativeCompletionResult>;
-  stopCompletion(contextId: number): Promise<void>;
-  tokenize(contextId: number, text: string): Promise<NativeTokenizeResult>;
-  detokenize(contextId: number, tokens: number[]): Promise<string>;
-  embedding(contextId: number, text: string): Promise<NativeEmbeddingResult>;
-  bench(contextId: number, pp: number, tg: number, pl: number, nr: number): Promise<string>;
-
-  releaseContext(contextId: number): Promise<void>;
+export type NativeLlamaChatMessage = {
+  role: string
+  content: string
+}
 
-  releaseAllContexts(): Promise<void>;
+export interface Spec extends TurboModule {
+  setContextLimit(limit: number): Promise<void>
+  initContext(params: NativeContextParams): Promise<NativeLlamaContext>
+
+  getFormattedChat(
+    contextId: number,
+    messages: NativeLlamaChatMessage[],
+    chatTemplate?: string,
+  ): Promise<string>
+  loadSession(
+    contextId: number,
+    filepath: string,
+  ): Promise<NativeSessionLoadResult>
+  saveSession(
+    contextId: number,
+    filepath: string,
+    size: number,
+  ): Promise<number>
+  completion(
+    contextId: number,
+    params: NativeCompletionParams,
+  ): Promise<NativeCompletionResult>
+  stopCompletion(contextId: number): Promise<void>
+  tokenize(contextId: number, text: string): Promise<NativeTokenizeResult>
+  detokenize(contextId: number, tokens: number[]): Promise<string>
+  embedding(contextId: number, text: string): Promise<NativeEmbeddingResult>
+  bench(
+    contextId: number,
+    pp: number,
+    tg: number,
+    pl: number,
+    nr: number,
+  ): Promise<string>
+
+  releaseContext(contextId: number): Promise<void>
+
+  releaseAllContexts(): Promise<void>
 }
 
-export default TurboModuleRegistry.get<Spec>('RNLlama') as Spec;
+export default TurboModuleRegistry.get<Spec>('RNLlama') as Spec
diff --git a/src/chat.ts b/src/chat.ts
index 9f18d00..a88a374 100644
--- a/src/chat.ts
+++ b/src/chat.ts
@@ -1,3 +1,5 @@
+import type { NativeLlamaChatMessage } from './NativeRNLlama'
+
 export type RNLlamaMessagePart = {
   text?: string
 }
@@ -7,15 +9,10 @@ export type RNLlamaOAICompatibleMessage = {
   content?: string | RNLlamaMessagePart[] | any // any for check invalid content type
 }
 
-export type RNLlamaChatMessage = {
-  role: string
-  content: string
-}
-
 export function formatChat(
   messages: RNLlamaOAICompatibleMessage[],
-): RNLlamaChatMessage[] {
-  const chat: RNLlamaChatMessage[] = []
+): NativeLlamaChatMessage[] {
+  const chat: NativeLlamaChatMessage[] = []
 
   messages.forEach((currMsg) => {
     const role: string = currMsg.role || ''
diff --git a/src/index.ts b/src/index.ts
index eabdebb..b6f616d 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -12,6 +12,8 @@ import type {
   NativeSessionLoadResult,
 } from './NativeRNLlama'
 import { SchemaGrammarConverter, convertJsonSchemaToGrammar } from './grammar'
+import type { RNLlamaOAICompatibleMessage } from './chat'
+import { formatChat } from './chat'
 
 export { SchemaGrammarConverter, convertJsonSchemaToGrammar }
 
@@ -38,7 +40,10 @@ type TokenNativeEvent = {
 
 export type ContextParams = NativeContextParams
 
-export type CompletionParams = Omit<NativeCompletionParams, 'emit_partial_completion'>
+export type CompletionParams = Omit<
+  NativeCompletionParams,
+  'emit_partial_completion'
+>
 
 export type BenchResult = {
   modelDesc: string
@@ -57,14 +62,11 @@ export class LlamaContext {
 
   reasonNoGPU: string = ''
 
-  model: Object = {}
+  model: {
+    isChatTemplateSupported?: boolean
+  } = {}
 
-  constructor({
-    contextId,
-    gpu,
-    reasonNoGPU,
-    model,
-  }: NativeLlamaContext) {
+  constructor({ contextId, gpu, reasonNoGPU, model }: NativeLlamaContext) {
     this.id = contextId
     this.gpu = gpu
     this.reasonNoGPU = reasonNoGPU
@@ -83,22 +85,37 @@ export class LlamaContext {
   /**
    * Save current cached prompt & completion state to a file.
    */
-  async saveSession(filepath: string, options?: { tokenSize: number }): Promise<number> {
+  async saveSession(
+    filepath: string,
+    options?: { tokenSize: number },
+  ): Promise<number> {
     return RNLlama.saveSession(this.id, filepath, options?.tokenSize || -1)
   }
 
+  async getFormattedChat(
+    messages: RNLlamaOAICompatibleMessage[],
+  ): Promise<string> {
+    const chat = formatChat(messages)
+    return RNLlama.getFormattedChat(
+      this.id,
+      chat,
+      this.model?.isChatTemplateSupported ? undefined : 'chatml',
+    )
+  }
+
+  // async chatCompletion() {} // TODO
+
   async completion(
     params: CompletionParams,
     callback?: (data: TokenData) => void,
   ): Promise<NativeCompletionResult> {
-    let tokenListener: any = callback && EventEmitter.addListener(
-      EVENT_ON_TOKEN,
-      (evt: TokenNativeEvent) => {
+    let tokenListener: any =
+      callback &&
+      EventEmitter.addListener(EVENT_ON_TOKEN, (evt: TokenNativeEvent) => {
         const { contextId, tokenResult } = evt
         if (contextId !== this.id) return
         callback(tokenResult)
-      },
-    )
+      })
     const promise = RNLlama.completion(this.id, {
       ...params,
       emit_partial_completion: !!callback,
@@ -132,17 +149,15 @@ export class LlamaContext {
     return RNLlama.embedding(this.id, text)
   }
 
-  async bench(pp: number, tg: number, pl: number, nr: number): Promise<BenchResult> {
+  async bench(
+    pp: number,
+    tg: number,
+    pl: number,
+    nr: number,
+  ): Promise<BenchResult> {
     const result = await RNLlama.bench(this.id, pp, tg, pl, nr)
-    const [
-      modelDesc,
-      modelSize,
-      modelNParams,
-      ppAvg,
-      ppStd,
-      tgAvg,
-      tgStd,
-    ] = JSON.parse(result)
+    const [modelDesc, modelSize, modelNParams, ppAvg, ppStd, tgAvg, tgStd] =
+      JSON.parse(result)
     return {
       modelDesc,
       modelSize,
@@ -170,12 +185,16 @@ export async function initLlama({
 }: ContextParams): Promise<LlamaContext> {
   let path = model
   if (path.startsWith('file://')) path = path.slice(7)
-  const { contextId, gpu, reasonNoGPU, model: modelDetails } =
-    await RNLlama.initContext({
-      model: path,
-      is_model_asset: !!isModelAsset,
-      ...rest,
-    })
+  const {
+    contextId,
+    gpu,
+    reasonNoGPU,
+    model: modelDetails,
+  } = await RNLlama.initContext({
+    model: path,
+    is_model_asset: !!isModelAsset,
+    ...rest,
+  })
   return new LlamaContext({ contextId, gpu, reasonNoGPU, model: modelDetails })
 }
 

From 944f508dc116250cdf48ed3f976f49c299740595 Mon Sep 17 00:00:00 2001
From: jhen <developer@jhen.me>
Date: Sun, 28 Jul 2024 14:57:58 +0800
Subject: [PATCH 4/6] feat(ts): completion: add messages

---
 src/index.ts | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/src/index.ts b/src/index.ts
index b6f616d..c0552a7 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -43,7 +43,9 @@ export type ContextParams = NativeContextParams
 export type CompletionParams = Omit<
   NativeCompletionParams,
   'emit_partial_completion'
->
+> & {
+  messages?: RNLlamaOAICompatibleMessage[]
+}
 
 export type BenchResult = {
   modelDesc: string
@@ -103,12 +105,17 @@ export class LlamaContext {
     )
   }
 
-  // async chatCompletion() {} // TODO
-
   async completion(
     params: CompletionParams,
     callback?: (data: TokenData) => void,
   ): Promise<NativeCompletionResult> {
+
+    let finalPrompt = params.prompt
+    if (params.messages) { // messages always win
+      finalPrompt = await this.getFormattedChat(params.messages)
+      console.log(finalPrompt)
+    }
+
     let tokenListener: any =
       callback &&
       EventEmitter.addListener(EVENT_ON_TOKEN, (evt: TokenNativeEvent) => {
@@ -116,8 +123,10 @@ export class LlamaContext {
         if (contextId !== this.id) return
         callback(tokenResult)
       })
+
     const promise = RNLlama.completion(this.id, {
       ...params,
+      prompt: finalPrompt,
       emit_partial_completion: !!callback,
     })
     return promise

From ad7e0a58004eb9020765d6e4b9ca44016378b337 Mon Sep 17 00:00:00 2001
From: jhen <developer@jhen.me>
Date: Sun, 28 Jul 2024 14:58:17 +0800
Subject: [PATCH 5/6] feat(example): use messages

---
 example/src/App.tsx | 89 +++++++++++++++++++++++++--------------------
 src/index.ts        |  5 ++-
 2 files changed, 53 insertions(+), 41 deletions(-)

diff --git a/example/src/App.tsx b/example/src/App.tsx
index fe5bb3d..8be06c5 100644
--- a/example/src/App.tsx
+++ b/example/src/App.tsx
@@ -21,30 +21,10 @@ const user = { id: 'y9d7f8pgn' }
 const systemId = 'h3o3lc5xj'
 const system = { id: systemId }
 
-const initialChatPrompt =
-  'This is a conversation between user and llama, a friendly chatbot. respond in simple markdown.\n\n'
-
-const generateChatPrompt = (
-  context: LlamaContext | undefined,
-  conversationId: string,
-  messages: MessageType.Any[],
-) => {
-  const prompt = [...messages]
-    .reverse()
-    .map((msg) => {
-      if (
-        !msg.metadata?.system &&
-        msg.metadata?.conversationId === conversationId &&
-        msg.metadata?.contextId === context?.id &&
-        msg.type === 'text'
-      ) {
-        return `${msg.author.id === systemId ? 'llama' : 'User'}: ${msg.text}`
-      }
-      return ''
-    })
-    .filter(Boolean)
-    .join('\n')
-  return initialChatPrompt + prompt
+const systemMessage = {
+  role: 'system',
+  content:
+    'This is a conversation between user and assistant, a friendly chatbot.\n\n',
 }
 
 const defaultConversationId = 'default'
@@ -259,32 +239,50 @@ export default function App() {
         conversationId: conversationIdRef.current,
       },
     }
-    addMessage(textMessage)
-    setInferencing(true)
 
     const id = randId()
     const createdAt = Date.now()
-    let prompt = generateChatPrompt(context, conversationIdRef.current, [
-      textMessage,
-      ...messages,
-    ])
-    prompt += `\nllama:`
+    const msgs = [
+      systemMessage,
+      ...[...messages]
+        .reverse()
+        .map((msg) => {
+          if (
+            !msg.metadata?.system &&
+            msg.metadata?.conversationId === conversationIdRef.current &&
+            msg.metadata?.contextId === context?.id &&
+            msg.type === 'text'
+          ) {
+            return {
+              role: msg.author.id === systemId ? 'assistant' : 'user',
+              content: msg.text,
+            }
+          }
+          return { role: '', content: '' }
+        })
+        .filter((msg) => msg.role),
+      { role: 'user', content: message.text },
+    ]
+    addMessage(textMessage)
+    setInferencing(true)
 
+    // Test area
     {
       // Test tokenize
+      const formattedChat = (await context?.getFormattedChat(msgs)) || ''
       const t0 = Date.now()
-      const { tokens } = (await context?.tokenize(prompt)) || {}
+      const { tokens } = (await context?.tokenize(formattedChat)) || {}
       const t1 = Date.now()
       console.log(
-        'Prompt:',
-        prompt,
+        'Formatted:',
+        `"${formattedChat}"`,
         '\nTokenize:',
         tokens,
         `(${tokens?.length} tokens, ${t1 - t0}ms})`,
       )
 
       // Test embedding
-      // await context?.embedding(prompt).then((result) => {
+      // await context?.embedding(formattedChat).then((result) => {
       //   console.log('Embedding:', result)
       // })
 
@@ -342,7 +340,7 @@ export default function App() {
     context
       ?.completion(
         {
-          prompt,
+          messages: msgs,
           n_predict: 400,
           temperature: 0.7,
           top_k: 40, // <= 0 to use vocab size
@@ -357,9 +355,19 @@ export default function App() {
           mirostat_tau: 5, // target entropy
           mirostat_eta: 0.1, // learning rate
           penalize_nl: false, // penalize newlines
-          seed: 1234, // random seed
+          seed: -1, // random seed
           n_probs: 0, // Show probabilities
-          stop: ['</s>', 'llama:', 'User:'],
+          stop: [
+            '</s>',
+            '<|end|>',
+            '<|eot_id|>',
+            '<|end_of_text|>',
+            '<|im_end|>',
+            '<|EOT|>',
+            '<|END_OF_TURN_TOKEN|>',
+            '<|end_of_turn|>',
+            '<|endoftext|>',
+          ],
           grammar,
           // n_threads: 4,
           // logit_bias: [[15043,1.0]],
@@ -386,7 +394,10 @@ export default function App() {
                 id,
                 text: token,
                 type: 'text',
-                metadata: { contextId: context?.id },
+                metadata: {
+                  contextId: context?.id,
+                  conversationId: conversationIdRef.current,
+                },
               },
               ...msgs,
             ]
diff --git a/src/index.ts b/src/index.ts
index c0552a7..151dd18 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -42,8 +42,9 @@ export type ContextParams = NativeContextParams
 
 export type CompletionParams = Omit<
   NativeCompletionParams,
-  'emit_partial_completion'
+  'emit_partial_completion' | 'prompt'
 > & {
+  prompt?: string
   messages?: RNLlamaOAICompatibleMessage[]
 }
 
@@ -113,7 +114,6 @@ export class LlamaContext {
     let finalPrompt = params.prompt
     if (params.messages) { // messages always win
       finalPrompt = await this.getFormattedChat(params.messages)
-      console.log(finalPrompt)
     }
 
     let tokenListener: any =
@@ -124,6 +124,7 @@ export class LlamaContext {
         callback(tokenResult)
       })
 
+    if (!finalPrompt) throw new Error('Prompt is required')
     const promise = RNLlama.completion(this.id, {
       ...params,
       prompt: finalPrompt,

From 23d8f0390e17d6f815bba077618eb139b9b1130d Mon Sep 17 00:00:00 2001
From: jhen <developer@jhen.me>
Date: Sun, 28 Jul 2024 16:06:36 +0800
Subject: [PATCH 6/6] feat(docs): update

---
 README.md                                  | 114 ++++++++++++++-------
 docs/API/README.md                         |  18 ++--
 docs/API/classes/LlamaContext.md           |  55 +++++++---
 docs/API/classes/SchemaGrammarConverter.md |  32 +++---
 4 files changed, 141 insertions(+), 78 deletions(-)

diff --git a/README.md b/README.md
index ccf3017..175c2fc 100644
--- a/README.md
+++ b/README.md
@@ -34,10 +34,12 @@ You can search HuggingFace for available models (Keyword: [`GGUF`](https://huggi
 For create a GGUF model manually, for example in Llama 2:
 
 Download the Llama 2 model
+
 1. Request access from [here](https://ai.meta.com/llama)
 2. Download the model from HuggingFace [here](https://huggingface.co/meta-llama/Llama-2-7b-chat) (`Llama-2-7b-chat`)
 
 Convert the model to ggml format
+
 ```bash
 # Start with submodule in this repo (or you can clone the repo https://github.com/ggerganov/llama.cpp.git)
 yarn && yarn bootstrap
@@ -76,26 +78,53 @@ const context = await initLlama({
   // embedding: true, // use embedding
 })
 
-// Do completion
-const { text, timings } = await context.completion(
+const stopWords = ['</s>', '<|end|>', '<|eot_id|>', '<|end_of_text|>', '<|im_end|>', '<|EOT|>', '<|END_OF_TURN_TOKEN|>', '<|end_of_turn|>', '<|endoftext|>']
+
+// Do chat completion
+const msgResult = await context.completion(
+  {
+    messages: [
+      {
+        role: 'system',
+        content: 'This is a conversation between user and assistant, a friendly chatbot.',
+      },
+      {
+        role: 'user',
+        content: 'Hello!',
+      },
+    ],
+    n_predict: 100,
+    stop: stopWords,
+    // ...other params
+  },
+  (data) => {
+    // This is a partial completion callback
+    const { token } = data
+  },
+)
+console.log('Result:', msgResult.text)
+console.log('Timings:', msgResult.timings)
+
+// Or do text completion
+const textResult = await context.completion(
   {
     prompt: 'This is a conversation between user and llama, a friendly chatbot. respond in simple markdown.\n\nUser: Hello!\nLlama:',
     n_predict: 100,
-    stop: ['</s>', 'Llama:', 'User:'],
-    // n_threads: 4,
+    stop: [...stopWords, 'Llama:', 'User:'],
+    // ...other params
   },
   (data) => {
     // This is a partial completion callback
     const { token } = data
   },
 )
-console.log('Result:', text)
-console.log('Timings:', timings)
+console.log('Result:', textResult.text)
+console.log('Timings:', textResult.timings)
 ```
 
 The binding’s deisgn inspired by [server.cpp](https://github.com/ggerganov/llama.cpp/tree/master/examples/server) example in llama.cpp, so you can map its API to LlamaContext:
 
-- `/completion`: `context.completion(params, partialCompletionCallback)`
+- `/completion` and `/chat/completions`: `context.completion(params, partialCompletionCallback)`
 - `/tokenize`: `context.tokenize(content)`
 - `/detokenize`: `context.detokenize(tokens)`
 - `/embedding`: `context.embedding(content)`
@@ -110,6 +139,7 @@ Please visit the [Documentation](docs/API) for more details.
 You can also visit the [example](example) to see how to use it.
 
 Run the example:
+
 ```bash
 yarn && yarn bootstrap
 
@@ -142,7 +172,9 @@ You can see [GBNF Guide](https://github.com/ggerganov/llama.cpp/tree/master/gram
 ```js
 import { initLlama, convertJsonSchemaToGrammar } from 'llama.rn'
 
-const schema = { /* JSON Schema, see below */ }
+const schema = {
+  /* JSON Schema, see below */
+}
 
 const context = await initLlama({
   model: 'file://<path to gguf model>',
@@ -153,7 +185,7 @@ const context = await initLlama({
   grammar: convertJsonSchemaToGrammar({
     schema,
     propOrder: { function: 0, arguments: 1 },
-  })
+  }),
 })
 
 const { text } = await context.completion({
@@ -171,80 +203,81 @@ console.log('Result:', text)
 {
   oneOf: [
     {
-      type: "object",
-      name: "get_current_weather",
-      description: "Get the current weather in a given location",
+      type: 'object',
+      name: 'get_current_weather',
+      description: 'Get the current weather in a given location',
       properties: {
         function: {
-          const: "get_current_weather",
+          const: 'get_current_weather',
         },
         arguments: {
-          type: "object",
+          type: 'object',
           properties: {
             location: {
-              type: "string",
-              description: "The city and state, e.g. San Francisco, CA",
+              type: 'string',
+              description: 'The city and state, e.g. San Francisco, CA',
             },
             unit: {
-              type: "string",
-              enum: ["celsius", "fahrenheit"],
+              type: 'string',
+              enum: ['celsius', 'fahrenheit'],
             },
           },
-          required: ["location"],
+          required: ['location'],
         },
       },
     },
     {
-      type: "object",
-      name: "create_event",
-      description: "Create a calendar event",
+      type: 'object',
+      name: 'create_event',
+      description: 'Create a calendar event',
       properties: {
         function: {
-          const: "create_event",
+          const: 'create_event',
         },
         arguments: {
-          type: "object",
+          type: 'object',
           properties: {
             title: {
-              type: "string",
-              description: "The title of the event",
+              type: 'string',
+              description: 'The title of the event',
             },
             date: {
-              type: "string",
-              description: "The date of the event",
+              type: 'string',
+              description: 'The date of the event',
             },
             time: {
-              type: "string",
-              description: "The time of the event",
+              type: 'string',
+              description: 'The time of the event',
             },
           },
-          required: ["title", "date", "time"],
+          required: ['title', 'date', 'time'],
         },
       },
     },
     {
-      type: "object",
-      name: "image_search",
-      description: "Search for an image",
+      type: 'object',
+      name: 'image_search',
+      description: 'Search for an image',
       properties: {
         function: {
-          const: "image_search",
+          const: 'image_search',
         },
         arguments: {
-          type: "object",
+          type: 'object',
           properties: {
             query: {
-              type: "string",
-              description: "The search query",
+              type: 'string',
+              description: 'The search query',
             },
           },
-          required: ["query"],
+          required: ['query'],
         },
       },
     },
   ],
 }
 ```
+
 </details>
 
 <details>
@@ -268,6 +301,7 @@ string ::=  "\"" (
 2 ::= "{" space "\"function\"" space ":" space 2-function "," space "\"arguments\"" space ":" space 2-arguments "}" space
 root ::= 0 | 1 | 2
 ```
+
 </details>
 
 ## Mock `llama.rn`
@@ -281,12 +315,14 @@ jest.mock('llama.rn', () => require('llama.rn/jest/mock'))
 ## NOTE
 
 iOS:
+
 - The [Extended Virtual Addressing](https://developer.apple.com/documentation/bundleresources/entitlements/com_apple_developer_kernel_extended-virtual-addressing) capability is recommended to enable on iOS project.
 - Metal:
   - We have tested to know some devices is not able to use Metal ('params.n_gpu_layers > 0') due to llama.cpp used SIMD-scoped operation, you can check if your device is supported in [Metal feature set tables](https://developer.apple.com/metal/Metal-Feature-Set-Tables.pdf), Apple7 GPU will be the minimum requirement.
   - It's also not supported in iOS simulator due to [this limitation](https://developer.apple.com/documentation/metal/developing_metal_apps_that_run_in_simulator#3241609), we used constant buffers more than 14.
 
 Android:
+
 - Currently only supported arm64-v8a / x86_64 platform, this means you can't initialize a context on another platforms. The 64-bit platform are recommended because it can allocate more memory for the model.
 - No integrated any GPU backend yet.
 
diff --git a/docs/API/README.md b/docs/API/README.md
index b998afd..cf9c944 100644
--- a/docs/API/README.md
+++ b/docs/API/README.md
@@ -43,17 +43,17 @@ llama.rn
 
 #### Defined in
 
-[index.ts:43](https://github.com/mybigday/llama.rn/blob/f95f600/src/index.ts#L43)
+[index.ts:51](https://github.com/mybigday/llama.rn/blob/ad7e0a5/src/index.ts#L51)
 
 ___
 
 ### CompletionParams
 
-Ƭ **CompletionParams**: `Omit`<`NativeCompletionParams`, ``"emit_partial_completion"``\>
+Ƭ **CompletionParams**: `Omit`<`NativeCompletionParams`, ``"emit_partial_completion"`` \| ``"prompt"``\> & { `messages?`: `RNLlamaOAICompatibleMessage`[] ; `prompt?`: `string`  }
 
 #### Defined in
 
-[index.ts:41](https://github.com/mybigday/llama.rn/blob/f95f600/src/index.ts#L41)
+[index.ts:43](https://github.com/mybigday/llama.rn/blob/ad7e0a5/src/index.ts#L43)
 
 ___
 
@@ -63,7 +63,7 @@ ___
 
 #### Defined in
 
-[index.ts:39](https://github.com/mybigday/llama.rn/blob/f95f600/src/index.ts#L39)
+[index.ts:41](https://github.com/mybigday/llama.rn/blob/ad7e0a5/src/index.ts#L41)
 
 ___
 
@@ -80,7 +80,7 @@ ___
 
 #### Defined in
 
-[index.ts:29](https://github.com/mybigday/llama.rn/blob/f95f600/src/index.ts#L29)
+[index.ts:31](https://github.com/mybigday/llama.rn/blob/ad7e0a5/src/index.ts#L31)
 
 ## Functions
 
@@ -104,7 +104,7 @@ ___
 
 #### Defined in
 
-[grammar.ts:824](https://github.com/mybigday/llama.rn/blob/f95f600/src/grammar.ts#L824)
+[grammar.ts:824](https://github.com/mybigday/llama.rn/blob/ad7e0a5/src/grammar.ts#L824)
 
 ___
 
@@ -124,7 +124,7 @@ ___
 
 #### Defined in
 
-[index.ts:166](https://github.com/mybigday/llama.rn/blob/f95f600/src/index.ts#L166)
+[index.ts:191](https://github.com/mybigday/llama.rn/blob/ad7e0a5/src/index.ts#L191)
 
 ___
 
@@ -138,7 +138,7 @@ ___
 
 #### Defined in
 
-[index.ts:182](https://github.com/mybigday/llama.rn/blob/f95f600/src/index.ts#L182)
+[index.ts:211](https://github.com/mybigday/llama.rn/blob/ad7e0a5/src/index.ts#L211)
 
 ___
 
@@ -158,4 +158,4 @@ ___
 
 #### Defined in
 
-[index.ts:162](https://github.com/mybigday/llama.rn/blob/f95f600/src/index.ts#L162)
+[index.ts:187](https://github.com/mybigday/llama.rn/blob/ad7e0a5/src/index.ts#L187)
diff --git a/docs/API/classes/LlamaContext.md b/docs/API/classes/LlamaContext.md
index 984bc1d..4da652f 100644
--- a/docs/API/classes/LlamaContext.md
+++ b/docs/API/classes/LlamaContext.md
@@ -21,6 +21,7 @@
 - [completion](LlamaContext.md#completion)
 - [detokenize](LlamaContext.md#detokenize)
 - [embedding](LlamaContext.md#embedding)
+- [getFormattedChat](LlamaContext.md#getformattedchat)
 - [loadSession](LlamaContext.md#loadsession)
 - [release](LlamaContext.md#release)
 - [saveSession](LlamaContext.md#savesession)
@@ -41,7 +42,7 @@
 
 #### Defined in
 
-[index.ts:62](https://github.com/mybigday/llama.rn/blob/f95f600/src/index.ts#L62)
+[index.ts:72](https://github.com/mybigday/llama.rn/blob/ad7e0a5/src/index.ts#L72)
 
 ## Properties
 
@@ -51,7 +52,7 @@
 
 #### Defined in
 
-[index.ts:56](https://github.com/mybigday/llama.rn/blob/f95f600/src/index.ts#L56)
+[index.ts:64](https://github.com/mybigday/llama.rn/blob/ad7e0a5/src/index.ts#L64)
 
 ___
 
@@ -61,7 +62,7 @@ ___
 
 #### Defined in
 
-[index.ts:54](https://github.com/mybigday/llama.rn/blob/f95f600/src/index.ts#L54)
+[index.ts:62](https://github.com/mybigday/llama.rn/blob/ad7e0a5/src/index.ts#L62)
 
 ___
 
@@ -69,9 +70,15 @@ ___
 
 • **model**: `Object` = `{}`
 
+#### Type declaration
+
+| Name | Type |
+| :------ | :------ |
+| `isChatTemplateSupported?` | `boolean` |
+
 #### Defined in
 
-[index.ts:60](https://github.com/mybigday/llama.rn/blob/f95f600/src/index.ts#L60)
+[index.ts:68](https://github.com/mybigday/llama.rn/blob/ad7e0a5/src/index.ts#L68)
 
 ___
 
@@ -81,7 +88,7 @@ ___
 
 #### Defined in
 
-[index.ts:58](https://github.com/mybigday/llama.rn/blob/f95f600/src/index.ts#L58)
+[index.ts:66](https://github.com/mybigday/llama.rn/blob/ad7e0a5/src/index.ts#L66)
 
 ## Methods
 
@@ -104,7 +111,7 @@ ___
 
 #### Defined in
 
-[index.ts:135](https://github.com/mybigday/llama.rn/blob/f95f600/src/index.ts#L135)
+[index.ts:162](https://github.com/mybigday/llama.rn/blob/ad7e0a5/src/index.ts#L162)
 
 ___
 
@@ -125,7 +132,7 @@ ___
 
 #### Defined in
 
-[index.ts:90](https://github.com/mybigday/llama.rn/blob/f95f600/src/index.ts#L90)
+[index.ts:109](https://github.com/mybigday/llama.rn/blob/ad7e0a5/src/index.ts#L109)
 
 ___
 
@@ -145,7 +152,7 @@ ___
 
 #### Defined in
 
-[index.ts:127](https://github.com/mybigday/llama.rn/blob/f95f600/src/index.ts#L127)
+[index.ts:154](https://github.com/mybigday/llama.rn/blob/ad7e0a5/src/index.ts#L154)
 
 ___
 
@@ -165,7 +172,27 @@ ___
 
 #### Defined in
 
-[index.ts:131](https://github.com/mybigday/llama.rn/blob/f95f600/src/index.ts#L131)
+[index.ts:158](https://github.com/mybigday/llama.rn/blob/ad7e0a5/src/index.ts#L158)
+
+___
+
+### getFormattedChat
+
+▸ **getFormattedChat**(`messages`): `Promise`<`string`\>
+
+#### Parameters
+
+| Name | Type |
+| :------ | :------ |
+| `messages` | `RNLlamaOAICompatibleMessage`[] |
+
+#### Returns
+
+`Promise`<`string`\>
+
+#### Defined in
+
+[index.ts:98](https://github.com/mybigday/llama.rn/blob/ad7e0a5/src/index.ts#L98)
 
 ___
 
@@ -187,7 +214,7 @@ Load cached prompt & completion state from a file.
 
 #### Defined in
 
-[index.ts:77](https://github.com/mybigday/llama.rn/blob/f95f600/src/index.ts#L77)
+[index.ts:82](https://github.com/mybigday/llama.rn/blob/ad7e0a5/src/index.ts#L82)
 
 ___
 
@@ -201,7 +228,7 @@ ___
 
 #### Defined in
 
-[index.ts:157](https://github.com/mybigday/llama.rn/blob/f95f600/src/index.ts#L157)
+[index.ts:182](https://github.com/mybigday/llama.rn/blob/ad7e0a5/src/index.ts#L182)
 
 ___
 
@@ -225,7 +252,7 @@ Save current cached prompt & completion state to a file.
 
 #### Defined in
 
-[index.ts:86](https://github.com/mybigday/llama.rn/blob/f95f600/src/index.ts#L86)
+[index.ts:91](https://github.com/mybigday/llama.rn/blob/ad7e0a5/src/index.ts#L91)
 
 ___
 
@@ -239,7 +266,7 @@ ___
 
 #### Defined in
 
-[index.ts:119](https://github.com/mybigday/llama.rn/blob/f95f600/src/index.ts#L119)
+[index.ts:146](https://github.com/mybigday/llama.rn/blob/ad7e0a5/src/index.ts#L146)
 
 ___
 
@@ -259,4 +286,4 @@ ___
 
 #### Defined in
 
-[index.ts:123](https://github.com/mybigday/llama.rn/blob/f95f600/src/index.ts#L123)
+[index.ts:150](https://github.com/mybigday/llama.rn/blob/ad7e0a5/src/index.ts#L150)
diff --git a/docs/API/classes/SchemaGrammarConverter.md b/docs/API/classes/SchemaGrammarConverter.md
index 8b9a535..09cecb4 100644
--- a/docs/API/classes/SchemaGrammarConverter.md
+++ b/docs/API/classes/SchemaGrammarConverter.md
@@ -46,7 +46,7 @@
 
 #### Defined in
 
-[grammar.ts:211](https://github.com/mybigday/llama.rn/blob/f95f600/src/grammar.ts#L211)
+[grammar.ts:211](https://github.com/mybigday/llama.rn/blob/ad7e0a5/src/grammar.ts#L211)
 
 ## Properties
 
@@ -56,7 +56,7 @@
 
 #### Defined in
 
-[grammar.ts:201](https://github.com/mybigday/llama.rn/blob/f95f600/src/grammar.ts#L201)
+[grammar.ts:201](https://github.com/mybigday/llama.rn/blob/ad7e0a5/src/grammar.ts#L201)
 
 ___
 
@@ -66,7 +66,7 @@ ___
 
 #### Defined in
 
-[grammar.ts:203](https://github.com/mybigday/llama.rn/blob/f95f600/src/grammar.ts#L203)
+[grammar.ts:203](https://github.com/mybigday/llama.rn/blob/ad7e0a5/src/grammar.ts#L203)
 
 ___
 
@@ -76,7 +76,7 @@ ___
 
 #### Defined in
 
-[grammar.ts:199](https://github.com/mybigday/llama.rn/blob/f95f600/src/grammar.ts#L199)
+[grammar.ts:199](https://github.com/mybigday/llama.rn/blob/ad7e0a5/src/grammar.ts#L199)
 
 ___
 
@@ -90,7 +90,7 @@ ___
 
 #### Defined in
 
-[grammar.ts:207](https://github.com/mybigday/llama.rn/blob/f95f600/src/grammar.ts#L207)
+[grammar.ts:207](https://github.com/mybigday/llama.rn/blob/ad7e0a5/src/grammar.ts#L207)
 
 ___
 
@@ -100,7 +100,7 @@ ___
 
 #### Defined in
 
-[grammar.ts:209](https://github.com/mybigday/llama.rn/blob/f95f600/src/grammar.ts#L209)
+[grammar.ts:209](https://github.com/mybigday/llama.rn/blob/ad7e0a5/src/grammar.ts#L209)
 
 ___
 
@@ -114,7 +114,7 @@ ___
 
 #### Defined in
 
-[grammar.ts:205](https://github.com/mybigday/llama.rn/blob/f95f600/src/grammar.ts#L205)
+[grammar.ts:205](https://github.com/mybigday/llama.rn/blob/ad7e0a5/src/grammar.ts#L205)
 
 ## Methods
 
@@ -135,7 +135,7 @@ ___
 
 #### Defined in
 
-[grammar.ts:693](https://github.com/mybigday/llama.rn/blob/f95f600/src/grammar.ts#L693)
+[grammar.ts:693](https://github.com/mybigday/llama.rn/blob/ad7e0a5/src/grammar.ts#L693)
 
 ___
 
@@ -156,7 +156,7 @@ ___
 
 #### Defined in
 
-[grammar.ts:224](https://github.com/mybigday/llama.rn/blob/f95f600/src/grammar.ts#L224)
+[grammar.ts:224](https://github.com/mybigday/llama.rn/blob/ad7e0a5/src/grammar.ts#L224)
 
 ___
 
@@ -179,7 +179,7 @@ ___
 
 #### Defined in
 
-[grammar.ts:710](https://github.com/mybigday/llama.rn/blob/f95f600/src/grammar.ts#L710)
+[grammar.ts:710](https://github.com/mybigday/llama.rn/blob/ad7e0a5/src/grammar.ts#L710)
 
 ___
 
@@ -200,7 +200,7 @@ ___
 
 #### Defined in
 
-[grammar.ts:312](https://github.com/mybigday/llama.rn/blob/f95f600/src/grammar.ts#L312)
+[grammar.ts:312](https://github.com/mybigday/llama.rn/blob/ad7e0a5/src/grammar.ts#L312)
 
 ___
 
@@ -220,7 +220,7 @@ ___
 
 #### Defined in
 
-[grammar.ts:518](https://github.com/mybigday/llama.rn/blob/f95f600/src/grammar.ts#L518)
+[grammar.ts:518](https://github.com/mybigday/llama.rn/blob/ad7e0a5/src/grammar.ts#L518)
 
 ___
 
@@ -241,7 +241,7 @@ ___
 
 #### Defined in
 
-[grammar.ts:323](https://github.com/mybigday/llama.rn/blob/f95f600/src/grammar.ts#L323)
+[grammar.ts:323](https://github.com/mybigday/llama.rn/blob/ad7e0a5/src/grammar.ts#L323)
 
 ___
 
@@ -255,7 +255,7 @@ ___
 
 #### Defined in
 
-[grammar.ts:813](https://github.com/mybigday/llama.rn/blob/f95f600/src/grammar.ts#L813)
+[grammar.ts:813](https://github.com/mybigday/llama.rn/blob/ad7e0a5/src/grammar.ts#L813)
 
 ___
 
@@ -276,7 +276,7 @@ ___
 
 #### Defined in
 
-[grammar.ts:247](https://github.com/mybigday/llama.rn/blob/f95f600/src/grammar.ts#L247)
+[grammar.ts:247](https://github.com/mybigday/llama.rn/blob/ad7e0a5/src/grammar.ts#L247)
 
 ___
 
@@ -297,4 +297,4 @@ ___
 
 #### Defined in
 
-[grammar.ts:529](https://github.com/mybigday/llama.rn/blob/f95f600/src/grammar.ts#L529)
+[grammar.ts:529](https://github.com/mybigday/llama.rn/blob/ad7e0a5/src/grammar.ts#L529)