From 12ae811ccd214225c3f43724143e406f1896a01c Mon Sep 17 00:00:00 2001 From: Fabian Wolf Date: Mon, 11 Nov 2024 10:30:48 +0100 Subject: [PATCH] minor adjustment to llama.cpp server call --- .gitignore | 1 + webapp/llm_processing/routes.py | 4 +++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index e408333..e8e7e1b 100644 --- a/.gitignore +++ b/.gitignore @@ -5,4 +5,5 @@ logs* __pycache__ test_*.py +test_* output.pdf \ No newline at end of file diff --git a/webapp/llm_processing/routes.py b/webapp/llm_processing/routes.py index 1be133e..b15f960 100644 --- a/webapp/llm_processing/routes.py +++ b/webapp/llm_processing/routes.py @@ -227,7 +227,9 @@ def extract_from_report( "-b", "2048", "-ub", - "2048", + "512", + "-t", + "16", ] + (["--verbose"] if verbose_llama else []) + (["--mlock"] if mlock else []) + (["-ctk", kv_cache_type, "-ctv", kv_cache_type] if kv_cache_type != "" else []) + (["-sm", "none", "-mg", str(gpu)] if gpu not in ["all", "ALL", "mps", ""] else [])+