Skip to content

Optimum TPU / Test TGI on TPU (slow tests) / Jetstream Pytorch #56

Optimum TPU / Test TGI on TPU (slow tests) / Jetstream Pytorch

Optimum TPU / Test TGI on TPU (slow tests) / Jetstream Pytorch #56

name: Optimum TPU / Test TGI on TPU (slow tests) / Jetstream Pytorch
on:
schedule:
- cron: '0 3 * * *' # run at 3 AM UTC
# This can be used to allow manually triggering nightlies from the web interface
workflow_dispatch:
concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
cancel-in-progress: true
jobs:
do-the-job:
name: Build and Run slow tests
runs-on:
group: gcp-ct5lp-hightpu-8t
container:
image: us-central1-docker.pkg.dev/tpu-pytorch-releases/docker/xla:r2.4.0_3.10_tpuvm
options: --shm-size "16gb" --ipc host --privileged ${{ vars.V5_LITEPOD_8_ENV}} -v /mnt/hf_cache:/mnt/hf_cache
env:
PJRT_DEVICE: TPU
HF_TOKEN: ${{ secrets.HF_TOKEN_OPTIMUM_TPU_CI }}
HF_HUB_CACHE: /mnt/hf_cache/cache_huggingface
JETSTREAM_PT: 1
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Build and install Jetstream Pytorch TGI
run: |
make jetstream_requirements tgi_server test_installs
find text-generation-inference/ -name "text_generation_server-*whl" -exec python -m pip install {} \;
- name: Run TGI Jetstream Pytorch - Llama
run: |
python -m \
pytest -sv text-generation-inference/tests/test_decode_jetstream.py --runslow -k "slow and Llama"
- name: Run TGI Jetstream Pytorch - Gemma
run: |
python -m \
pytest -sv text-generation-inference/tests/test_decode_jetstream.py --runslow -k "slow and gemma"
- name: Run TGI Jetstream Pytorch - Mixtral greedy
run: |
python -m \
pytest -sv text-generation-inference/tests/test_decode_jetstream.py --runslow -k "slow and Mixtral and greedy"
- name: Run TGI Jetstream Pytorch - Quantization Mixtral
run: |
python -m \
pytest -sv text-generation-inference/tests/test_decode_jetstream_quant.py --runslow -k "slow and Mixtral"
- name: Run TGI Jetstream Pytorch - Quantization Llama-3 8B
run: |
python -m \
pytest -sv text-generation-inference/tests/test_decode_jetstream_quant.py --runslow -k "slow and Llama-3-8B"
- name: Run TGI Jetstream Pytorch - Quantization Llama 3 70B
run: |
python -m \
pytest -sv text-generation-inference/tests/test_decode_jetstream_quant.py --runslow -k "slow and Llama-3-70B"
- name: Run TGI Jetstream Pytorch - Other tests
run: |
python -m \
pytest -sv text-generation-inference/tests --runslow -k "jetstream and not decode and not quant"