diff --git a/container-images/cuda/Containerfile b/container-images/cuda/Containerfile index 6b7e8017..3952285a 100644 --- a/container-images/cuda/Containerfile +++ b/container-images/cuda/Containerfile @@ -1,7 +1,7 @@ # Base image with CUDA for compilation FROM docker.io/nvidia/cuda:12.6.2-devel-ubi9 AS builder -ARG LLAMA_CPP_SHA=1329c0a75e6a7defc5c380eaf80d8e0f66d7da78 +ARG LLAMA_CPP_SHA=af148c9386da825a60c7038549c121c35ca56b50 # renovate: datasource=git-refs depName=ggerganov/whisper.cpp packageName=https://github.com/ggerganov/whisper.cpp gitRef=master versioning=loose type=digest ARG WHISPER_CPP_SHA=f19463ece2d43fd0b605dc513d8800eeb4e2315e diff --git a/container-images/ramalama/Containerfile b/container-images/ramalama/Containerfile index ac8dee62..4e02658d 100644 --- a/container-images/ramalama/Containerfile +++ b/container-images/ramalama/Containerfile @@ -1,6 +1,6 @@ FROM registry.access.redhat.com/ubi9/ubi:9.4-1214.1729773476 -ARG LLAMA_CPP_SHA=1329c0a75e6a7defc5c380eaf80d8e0f66d7da78 +ARG LLAMA_CPP_SHA=af148c9386da825a60c7038549c121c35ca56b50 # renovate: datasource=git-refs depName=ggerganov/whisper.cpp packageName=https://github.com/ggerganov/whisper.cpp gitRef=master versioning=loose type=digest ARG WHISPER_CPP_SHA=f19463ece2d43fd0b605dc513d8800eeb4e2315e diff --git a/ramalama/model.py b/ramalama/model.py index 47d5304c..c8d6dcae 100644 --- a/ramalama/model.py +++ b/ramalama/model.py @@ -267,19 +267,8 @@ def run(self, args): if not args.container: exec_model_path = model_path - exec_args = ["llama-cli", "-m", exec_model_path, "--in-prefix", "", "--in-suffix", ""] - - if not args.debug: - exec_args += ["--no-display-prompt"] - - exec_args += [ - "-p", - prompt, - ] + self.common_params - - if not args.ARGS and sys.stdin.isatty(): - exec_args.append("-cnv") - + exec_args = ["llama-simple-chat", "-m", exec_model_path] + exec_args += self.common_params if args.gpu: exec_args.extend(self.gpu_args())