apply suggestions from review

eliasecchig · Aug 26, 2024 · 8328249 · 8328249
1 parent 19c7e6a
commit 8328249
Show file tree

Hide file tree

Showing 2 changed files with 1,153 additions and 1,118 deletions.
diff --git a/open-models/serving/Dockerfile b/open-models/serving/Dockerfile
@@ -0,0 +1,18 @@
+
+FROM ollama/ollama
+# Set the host and port to listen on
+ENV OLLAMA_HOST 0.0.0.0:8080
+# Set the directory to store model weight files
+ENV OLLAMA_MODELS /models
+# Reduce the verbosity of the logs
+ENV OLLAMA_DEBUG false
+# Do not unload model weights from the GPU
+ENV OLLAMA_KEEP_ALIVE -1
+# Choose the model to load. Ollama defaults to 4-bit quantized weights
+ENV MODEL gemma2:9b
+# Start the ollama server and download the model weights
+RUN ollama serve & sleep 5 && ollama pull $MODEL
+# At startup time we start the server and run a dummy request
+# to request the model to be loaded in the GPU memory
+ENTRYPOINT ["/bin/sh"]
+CMD ["-c", "ollama serve  & (ollama run $MODEL 'Say one word' &) && wait"]