-
Notifications
You must be signed in to change notification settings - Fork 1.3k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
This image is useful for benchmarking. It will be used for more automated testing and benchmarking in the future. PiperOrigin-RevId: 696993365
- Loading branch information
1 parent
9e0e42b
commit 0fbf649
Showing
1 changed file
with
34 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
ARG HF_USERNAME="" | ||
ARG HF_TOKEN="" | ||
ARG MODEL="Qwen/Qwen2.5-1.5B-Instruct" | ||
ARG VLLM_COMMIT_HASH="1dbae0329c6d907b72b373667b4d5716bae4415f" | ||
|
||
FROM ubuntu:22.04 AS downloader | ||
ARG HF_USERNAME | ||
ARG HF_TOKEN | ||
ARG MODEL | ||
ARG VLLM_COMMIT_HASH | ||
|
||
RUN apt-get update && apt-get install -y \ | ||
git git-lfs \ | ||
ffmpeg libsm6 libxext6 libgl1 | ||
|
||
RUN git lfs install | ||
RUN if [ -z "$HF_TOKEN" ]; then git clone "https://huggingface.co/$MODEL" /model; else git clone "https://$HF_USERNAME:[email protected]/$MODEL" /model; fi | ||
|
||
RUN git clone https://github.com/vllm-project/vllm.git /vllm && cd /vllm && git checkout "$VLLM_COMMIT_HASH" | ||
RUN GIT_CLONE_PROTECTION_ACTIVE=false git clone https://huggingface.co/datasets/anon8231489123/ShareGPT_Vicuna_unfiltered /dataset | ||
|
||
FROM us-central1-docker.pkg.dev/tpu-pytorch-releases/docker/xla:nightly_3.10_tpuvm_20241112 | ||
|
||
COPY --from=downloader /model /model | ||
COPY --from=downloader /vllm /vllm | ||
COPY --from=downloader /dataset/ShareGPT_V3_unfiltered_cleaned_split.json /ShareGPT_V3_unfiltered_cleaned_split.json | ||
|
||
WORKDIR /vllm | ||
|
||
ENV VLLM_TARGET_DEVICE="tpu" | ||
RUN python3 -m pip install -r requirements-tpu.txt | ||
RUN python3 setup.py develop | ||
|
||
CMD ["python3", "-m", "vllm.entrypoints.openai.api_server", "--model", "/model", "--chat-template", "/vllm/examples/template_chatml.jinja", "--tensor-parallel-size=4", "--max-model-len=512", "--enforce-eager"] |