forked from GoogleCloudPlatform/generative-ai
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
19c7e6a
commit 8328249
Showing
2 changed files
with
1,153 additions
and
1,118 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
|
||
FROM ollama/ollama | ||
# Set the host and port to listen on | ||
ENV OLLAMA_HOST 0.0.0.0:8080 | ||
# Set the directory to store model weight files | ||
ENV OLLAMA_MODELS /models | ||
# Reduce the verbosity of the logs | ||
ENV OLLAMA_DEBUG false | ||
# Do not unload model weights from the GPU | ||
ENV OLLAMA_KEEP_ALIVE -1 | ||
# Choose the model to load. Ollama defaults to 4-bit quantized weights | ||
ENV MODEL gemma2:9b | ||
# Start the ollama server and download the model weights | ||
RUN ollama serve & sleep 5 && ollama pull $MODEL | ||
# At startup time we start the server and run a dummy request | ||
# to request the model to be loaded in the GPU memory | ||
ENTRYPOINT ["/bin/sh"] | ||
CMD ["-c", "ollama serve & (ollama run $MODEL 'Say one word' &) && wait"] |
Oops, something went wrong.