diff --git a/container-images/cuda/Containerfile b/container-images/cuda/Containerfile
index 6b7e8017..3952285a 100644
--- a/container-images/cuda/Containerfile
+++ b/container-images/cuda/Containerfile
@@ -1,7 +1,7 @@
 # Base image with CUDA for compilation
 FROM docker.io/nvidia/cuda:12.6.2-devel-ubi9 AS builder
 
-ARG LLAMA_CPP_SHA=1329c0a75e6a7defc5c380eaf80d8e0f66d7da78
+ARG LLAMA_CPP_SHA=af148c9386da825a60c7038549c121c35ca56b50
 # renovate: datasource=git-refs depName=ggerganov/whisper.cpp packageName=https://github.com/ggerganov/whisper.cpp gitRef=master versioning=loose type=digest
 ARG WHISPER_CPP_SHA=f19463ece2d43fd0b605dc513d8800eeb4e2315e
 
diff --git a/container-images/ramalama/Containerfile b/container-images/ramalama/Containerfile
index ac8dee62..4e02658d 100644
--- a/container-images/ramalama/Containerfile
+++ b/container-images/ramalama/Containerfile
@@ -1,6 +1,6 @@
 FROM registry.access.redhat.com/ubi9/ubi:9.4-1214.1729773476
 
-ARG LLAMA_CPP_SHA=1329c0a75e6a7defc5c380eaf80d8e0f66d7da78
+ARG LLAMA_CPP_SHA=af148c9386da825a60c7038549c121c35ca56b50
 # renovate: datasource=git-refs depName=ggerganov/whisper.cpp packageName=https://github.com/ggerganov/whisper.cpp gitRef=master versioning=loose type=digest
 ARG WHISPER_CPP_SHA=f19463ece2d43fd0b605dc513d8800eeb4e2315e
 
diff --git a/ramalama/model.py b/ramalama/model.py
index 47d5304c..c8d6dcae 100644
--- a/ramalama/model.py
+++ b/ramalama/model.py
@@ -267,19 +267,8 @@ def run(self, args):
         if not args.container:
             exec_model_path = model_path
 
-        exec_args = ["llama-cli", "-m", exec_model_path, "--in-prefix", "", "--in-suffix", ""]
-
-        if not args.debug:
-            exec_args += ["--no-display-prompt"]
-
-        exec_args += [
-            "-p",
-            prompt,
-        ] + self.common_params
-
-        if not args.ARGS and sys.stdin.isatty():
-            exec_args.append("-cnv")
-
+        exec_args = ["llama-simple-chat", "-m", exec_model_path]
+        exec_args += self.common_params
         if args.gpu:
             exec_args.extend(self.gpu_args())