From 12ae811ccd214225c3f43724143e406f1896a01c Mon Sep 17 00:00:00 2001
From: Fabian Wolf <fabian@fawolf.de>
Date: Mon, 11 Nov 2024 10:30:48 +0100
Subject: [PATCH] minor adjustment to llama.cpp server call

---
 .gitignore                      | 1 +
 webapp/llm_processing/routes.py | 4 +++-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/.gitignore b/.gitignore
index e408333..e8e7e1b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -5,4 +5,5 @@ logs*
 __pycache__
 
 test_*.py
+test_*
 output.pdf
\ No newline at end of file
diff --git a/webapp/llm_processing/routes.py b/webapp/llm_processing/routes.py
index 1be133e..b15f960 100644
--- a/webapp/llm_processing/routes.py
+++ b/webapp/llm_processing/routes.py
@@ -227,7 +227,9 @@ def extract_from_report(
                 "-b",
                 "2048",
                 "-ub",
-                "2048",
+                "512",
+                "-t",
+                "16",
             ] + (["--verbose"] if verbose_llama else []) + (["--mlock"] if mlock else []) +
             (["-ctk", kv_cache_type, "-ctv", kv_cache_type] if kv_cache_type != "" else []) + 
             (["-sm", "none", "-mg", str(gpu)] if gpu not in ["all", "ALL", "mps", ""] else [])+