new reranking and embeddings

mgonzs13 · Jan 10, 2025 · 46f3f05 · 46f3f05
1 parent 1292f11
commit 46f3f05
Show file tree

Hide file tree

Showing 3 changed files with 19 additions and 4 deletions.
diff --git a/llama_bringup/models/All-MiniLM-L6-v2.yaml b/llama_bringup/models/All-MiniLM-L6-v2.yaml
@@ -0,0 +1,9 @@
+n_ctx: 2048
+n_batch: 1024
+n_gpu_layers: 0
+n_threads: 1
+n_predict: 2048
+embedding: true
+
+model_repo: "second-state/All-MiniLM-L6-v2-Embedding-GGUF"
+model_filename: "all-MiniLM-L6-v2-ggml-model-f16.gguf"
diff --git a/llama_bringup/models/bge-reranker-v2-m3.yaml b/llama_bringup/models/bge-reranker-v2-m3.yaml
@@ -0,0 +1,9 @@
+n_ctx: 2048
+n_batch: 1024
+n_gpu_layers: 0
+n_threads: -1
+n_predict: 2048
+reranking: true
+
+model_repo: "gpustack/bge-reranker-v2-m3-GGUF"
+model_filename: "bge-reranker-v2-m3-Q4_K_M.gguf"
diff --git a/llama_demos/llama_demos/chatllama_tools_demo_node.py b/llama_demos/llama_demos/chatllama_tools_demo_node.py
@@ -66,10 +66,7 @@ def send_prompt(self) -> None:
         ]
 
         self.get_logger().info(f"\nPrompt: {messages[0].content}")
-
-        llm_tools = self.chat.bind_tools(
-            [get_inhabitants, get_curr_temperature], tool_choice="any"
-        )
+        llm_tools = self.chat.bind_tools([get_inhabitants, get_curr_temperature])
 
         self.initial_time = time.time()
         all_tools_res = llm_tools.invoke(messages)