Optimum TPU / Test TGI on TPU (slow tests) / Jetstream Pytorch #56
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Optimum TPU / Test TGI on TPU (slow tests) / Jetstream Pytorch | |
on: | |
schedule: | |
- cron: '0 3 * * *' # run at 3 AM UTC | |
# This can be used to allow manually triggering nightlies from the web interface | |
workflow_dispatch: | |
concurrency: | |
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} | |
cancel-in-progress: true | |
jobs: | |
do-the-job: | |
name: Build and Run slow tests | |
runs-on: | |
group: gcp-ct5lp-hightpu-8t | |
container: | |
image: us-central1-docker.pkg.dev/tpu-pytorch-releases/docker/xla:r2.4.0_3.10_tpuvm | |
options: --shm-size "16gb" --ipc host --privileged ${{ vars.V5_LITEPOD_8_ENV}} -v /mnt/hf_cache:/mnt/hf_cache | |
env: | |
PJRT_DEVICE: TPU | |
HF_TOKEN: ${{ secrets.HF_TOKEN_OPTIMUM_TPU_CI }} | |
HF_HUB_CACHE: /mnt/hf_cache/cache_huggingface | |
JETSTREAM_PT: 1 | |
steps: | |
- name: Checkout | |
uses: actions/checkout@v4 | |
- name: Build and install Jetstream Pytorch TGI | |
run: | | |
make jetstream_requirements tgi_server test_installs | |
find text-generation-inference/ -name "text_generation_server-*whl" -exec python -m pip install {} \; | |
- name: Run TGI Jetstream Pytorch - Llama | |
run: | | |
python -m \ | |
pytest -sv text-generation-inference/tests/test_decode_jetstream.py --runslow -k "slow and Llama" | |
- name: Run TGI Jetstream Pytorch - Gemma | |
run: | | |
python -m \ | |
pytest -sv text-generation-inference/tests/test_decode_jetstream.py --runslow -k "slow and gemma" | |
- name: Run TGI Jetstream Pytorch - Mixtral greedy | |
run: | | |
python -m \ | |
pytest -sv text-generation-inference/tests/test_decode_jetstream.py --runslow -k "slow and Mixtral and greedy" | |
- name: Run TGI Jetstream Pytorch - Quantization Mixtral | |
run: | | |
python -m \ | |
pytest -sv text-generation-inference/tests/test_decode_jetstream_quant.py --runslow -k "slow and Mixtral" | |
- name: Run TGI Jetstream Pytorch - Quantization Llama-3 8B | |
run: | | |
python -m \ | |
pytest -sv text-generation-inference/tests/test_decode_jetstream_quant.py --runslow -k "slow and Llama-3-8B" | |
- name: Run TGI Jetstream Pytorch - Quantization Llama 3 70B | |
run: | | |
python -m \ | |
pytest -sv text-generation-inference/tests/test_decode_jetstream_quant.py --runslow -k "slow and Llama-3-70B" | |
- name: Run TGI Jetstream Pytorch - Other tests | |
run: | | |
python -m \ | |
pytest -sv text-generation-inference/tests --runslow -k "jetstream and not decode and not quant" |