Switch to llama-simple-chat

This is a new chat program in llama.cpp which is much simpler than the existing one we were using. It doesn't have the debug/verbose output problem and just seems higher quality in general for a simple chatbot, it's a few 100 lines of code. Signed-off-by: Eric Curtin <[email protected]>
containers · Nov 14, 2024 · f573427 · f573427
1 parent 1b5eb20
commit f573427
Show file tree

Hide file tree

Showing 3 changed files with 4 additions and 15 deletions.
diff --git a/container-images/cuda/Containerfile b/container-images/cuda/Containerfile
@@ -1,7 +1,7 @@
 # Base image with CUDA for compilation
 FROM docker.io/nvidia/cuda:12.6.2-devel-ubi9 AS builder
 
-ARG LLAMA_CPP_SHA=1329c0a75e6a7defc5c380eaf80d8e0f66d7da78
+ARG LLAMA_CPP_SHA=af148c9386da825a60c7038549c121c35ca56b50
 # renovate: datasource=git-refs depName=ggerganov/whisper.cpp packageName=https://github.com/ggerganov/whisper.cpp gitRef=master versioning=loose type=digest
 ARG WHISPER_CPP_SHA=f19463ece2d43fd0b605dc513d8800eeb4e2315e
 

diff --git a/container-images/ramalama/Containerfile b/container-images/ramalama/Containerfile
@@ -1,6 +1,6 @@
 FROM registry.access.redhat.com/ubi9/ubi:9.4-1214.1729773476
 
-ARG LLAMA_CPP_SHA=1329c0a75e6a7defc5c380eaf80d8e0f66d7da78
+ARG LLAMA_CPP_SHA=af148c9386da825a60c7038549c121c35ca56b50
 # renovate: datasource=git-refs depName=ggerganov/whisper.cpp packageName=https://github.com/ggerganov/whisper.cpp gitRef=master versioning=loose type=digest
 ARG WHISPER_CPP_SHA=f19463ece2d43fd0b605dc513d8800eeb4e2315e
 

diff --git a/ramalama/model.py b/ramalama/model.py
@@ -267,19 +267,8 @@ def run(self, args):
         if not args.container:
             exec_model_path = model_path
 
-        exec_args = ["llama-cli", "-m", exec_model_path, "--in-prefix", "", "--in-suffix", ""]
-
-        if not args.debug:
-            exec_args += ["--no-display-prompt"]
-
-        exec_args += [
-            "-p",
-            prompt,
-        ] + self.common_params
-
-        if not args.ARGS and sys.stdin.isatty():
-            exec_args.append("-cnv")
-
+        exec_args = ["llama-simple-chat", "-m", exec_model_path]
+        exec_args += self.common_params
         if args.gpu:
             exec_args.extend(self.gpu_args())