From 83d31f65a73128af265908399a0d5fc74f421766 Mon Sep 17 00:00:00 2001 From: Branden Butler Date: Sun, 10 Sep 2023 15:17:40 -0500 Subject: [PATCH] Use only physical core count for number of llama inference threads --- src/main/java/org/myrobotlab/service/Llama.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/main/java/org/myrobotlab/service/Llama.java b/src/main/java/org/myrobotlab/service/Llama.java index 7a7259a9a6..364ff1e75a 100644 --- a/src/main/java/org/myrobotlab/service/Llama.java +++ b/src/main/java/org/myrobotlab/service/Llama.java @@ -2,6 +2,7 @@ import de.kherud.llama.LlamaModel; import de.kherud.llama.Parameters; +import org.myrobotlab.framework.Platform; import org.myrobotlab.framework.Service; import org.myrobotlab.logging.Level; import org.myrobotlab.logging.LoggingFactory; @@ -37,7 +38,7 @@ public Llama(String reservedKey, String inId) { public void loadModel(String modelPath) { Parameters params = new Parameters.Builder() .setNGpuLayers(0) - .setNThreads(java.lang.Runtime.getRuntime().availableProcessors()) + .setNThreads(Platform.getLocalInstance().getNumPhysicalProcessors()) .setTemperature(0.7f) .setPenalizeNl(true) .setMirostat(Parameters.MiroStat.V2)