Skip to content

Commit

Permalink
Create vllm TPU serving image.
Browse files Browse the repository at this point in the history
This image is useful for benchmarking. It will be used for more automated
testing and benchmarking in the future.

PiperOrigin-RevId: 696993365
  • Loading branch information
manninglucas authored and gvisor-bot committed Nov 22, 2024
1 parent 9e0e42b commit 0fbf649
Showing 1 changed file with 34 additions and 0 deletions.
34 changes: 34 additions & 0 deletions images/tpu/vllm/Dockerfile.x86_64
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
ARG HF_USERNAME=""
ARG HF_TOKEN=""
ARG MODEL="Qwen/Qwen2.5-1.5B-Instruct"
ARG VLLM_COMMIT_HASH="1dbae0329c6d907b72b373667b4d5716bae4415f"

FROM ubuntu:22.04 AS downloader
ARG HF_USERNAME
ARG HF_TOKEN
ARG MODEL
ARG VLLM_COMMIT_HASH

RUN apt-get update && apt-get install -y \
git git-lfs \
ffmpeg libsm6 libxext6 libgl1

RUN git lfs install
RUN if [ -z "$HF_TOKEN" ]; then git clone "https://huggingface.co/$MODEL" /model; else git clone "https://$HF_USERNAME:[email protected]/$MODEL" /model; fi

RUN git clone https://github.com/vllm-project/vllm.git /vllm && cd /vllm && git checkout "$VLLM_COMMIT_HASH"
RUN GIT_CLONE_PROTECTION_ACTIVE=false git clone https://huggingface.co/datasets/anon8231489123/ShareGPT_Vicuna_unfiltered /dataset

FROM us-central1-docker.pkg.dev/tpu-pytorch-releases/docker/xla:nightly_3.10_tpuvm_20241112

COPY --from=downloader /model /model
COPY --from=downloader /vllm /vllm
COPY --from=downloader /dataset/ShareGPT_V3_unfiltered_cleaned_split.json /ShareGPT_V3_unfiltered_cleaned_split.json

WORKDIR /vllm

ENV VLLM_TARGET_DEVICE="tpu"
RUN python3 -m pip install -r requirements-tpu.txt
RUN python3 setup.py develop

CMD ["python3", "-m", "vllm.entrypoints.openai.api_server", "--model", "/model", "--chat-template", "/vllm/examples/template_chatml.jinja", "--tensor-parallel-size=4", "--max-model-len=512", "--enforce-eager"]

0 comments on commit 0fbf649

Please sign in to comment.