forked from huggingface/text-generation-inference
-
Notifications
You must be signed in to change notification settings - Fork 47
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add some missing modification of 2.3.0 because of conflict
Signed-off-by: yuanwu <[email protected]>
- Loading branch information
1 parent
514a5a7
commit 14fdc4a
Showing
26 changed files
with
2,944 additions
and
1,092 deletions.
There are no files selected for viewing
Empty file.
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
name: Automatic Documentation for Launcher | ||
|
||
on: | ||
pull_request: | ||
|
||
jobs: | ||
update_docs: | ||
runs-on: ubuntu-latest | ||
|
||
steps: | ||
- name: Checkout code | ||
uses: actions/checkout@v2 | ||
|
||
- name: Set up Rust | ||
uses: actions-rs/toolchain@v1 | ||
with: | ||
profile: minimal | ||
toolchain: stable | ||
|
||
- name: Install Protocol Buffers compiler | ||
run: | | ||
sudo apt-get update | ||
sudo apt-get install -y protobuf-compiler libprotobuf-dev | ||
- name: Install Launcher | ||
id: install-launcher | ||
run: cargo install --path launcher/ | ||
|
||
- name: Install router | ||
id: install-router | ||
run: cargo install --path backends/v3/ | ||
|
||
- uses: actions/setup-node@v4 | ||
with: | ||
node-version: 22 | ||
|
||
- name: Set up Python | ||
uses: actions/setup-python@v2 | ||
with: | ||
python-version: '3.x' | ||
|
||
- name: Check that documentation is up-to-date | ||
run: | | ||
npm install -g @redocly/cli | ||
python update_doc.py --check |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,191 @@ | ||
name: Build and push docker image to internal registry | ||
|
||
on: | ||
workflow_call: | ||
inputs: | ||
hardware: | ||
type: string | ||
description: Hardware | ||
# options: | ||
# - cuda | ||
# - rocm | ||
# - intel | ||
required: true | ||
release-tests: | ||
description: "Run release integration tests" | ||
required: true | ||
default: false | ||
type: boolean | ||
|
||
jobs: | ||
build-and-push: | ||
outputs: | ||
docker_image: ${{ steps.final.outputs.docker_image }} | ||
docker_devices: ${{ steps.final.outputs.docker_devices }} | ||
runs_on: ${{ steps.final.outputs.runs_on }} | ||
label: ${{ steps.final.outputs.label }} | ||
concurrency: | ||
group: ${{ github.workflow }}-build-and-push-image-${{ inputs.hardware }}-${{ github.head_ref || github.run_id }} | ||
cancel-in-progress: true | ||
runs-on: | ||
group: aws-highmemory-32-plus-priv | ||
permissions: | ||
contents: write | ||
packages: write | ||
steps: | ||
- name: Checkout repository | ||
uses: actions/checkout@v4 | ||
- name: Inject slug/short variables | ||
uses: rlespinasse/[email protected] | ||
- name: Construct harware variables | ||
shell: bash | ||
run: | | ||
case ${{ inputs.hardware }} in | ||
cuda) | ||
export dockerfile="Dockerfile" | ||
export label_extension="" | ||
export docker_devices="" | ||
export runs_on="aws-g6-12xl-plus-priv-cache" | ||
export platform="" | ||
;; | ||
rocm) | ||
export dockerfile="Dockerfile_amd" | ||
export label_extension="-rocm" | ||
export docker_devices="/dev/kfd,/dev/dri" | ||
# TODO Re-enable when they pass. | ||
# export runs_on="amd-gpu-tgi" | ||
export runs_on="ubuntu-latest" | ||
export platform="" | ||
;; | ||
intel-xpu) | ||
export dockerfile="Dockerfile_intel" | ||
export label_extension="-intel-xpu" | ||
export docker_devices="" | ||
export runs_on="ubuntu-latest" | ||
export platform="xpu" | ||
;; | ||
intel-cpu) | ||
export dockerfile="Dockerfile_intel" | ||
export label_extension="-intel-cpu" | ||
export docker_devices="" | ||
export runs_on="ubuntu-latest" | ||
export platform="cpu" | ||
;; | ||
esac | ||
echo $dockerfile | ||
echo "Dockerfile=${dockerfile}" | ||
echo $label_extension | ||
echo $docker_devices | ||
echo $runs_on | ||
echo $platform | ||
echo "DOCKERFILE=${dockerfile}" >> $GITHUB_ENV | ||
echo "LABEL=${label_extension}" >> $GITHUB_ENV | ||
echo "PLATFORM=${platform}" >> $GITHUB_ENV | ||
echo "DOCKER_DEVICES=${docker_devices}" >> $GITHUB_ENV | ||
echo "RUNS_ON=${runs_on}" >> $GITHUB_ENV | ||
echo REGISTRY_MIRROR=$REGISTRY_MIRROR >> $GITHUB_ENV | ||
- name: Initialize Docker Buildx | ||
uses: docker/setup-buildx-action@v3 | ||
with: | ||
install: true | ||
buildkitd-config: /tmp/buildkitd.toml | ||
- name: Login to internal Container Registry | ||
uses: docker/login-action@v3 | ||
with: | ||
username: ${{ secrets.REGISTRY_USERNAME }} | ||
password: ${{ secrets.REGISTRY_PASSWORD }} | ||
registry: registry.internal.huggingface.tech | ||
- name: Login to GitHub Container Registry | ||
if: github.event_name != 'pull_request' | ||
uses: docker/login-action@v3 | ||
with: | ||
registry: ghcr.io | ||
username: ${{ github.actor }} | ||
password: ${{ secrets.GITHUB_TOKEN }} | ||
- name: Login to Azure Container Registry | ||
if: github.event_name != 'pull_request' | ||
uses: docker/login-action@v3 | ||
with: | ||
username: ${{ secrets.AZURE_DOCKER_USERNAME }} | ||
password: ${{ secrets.AZURE_DOCKER_PASSWORD }} | ||
registry: db4c2190dd824d1f950f5d1555fbadf0.azurecr.io | ||
# If pull request | ||
- name: Extract metadata (tags, labels) for Docker | ||
if: ${{ github.event_name == 'pull_request' }} | ||
id: meta-pr | ||
uses: docker/metadata-action@v5 | ||
with: | ||
images: | | ||
registry.internal.huggingface.tech/api-inference/community/text-generation-inference | ||
tags: | | ||
type=raw,value=sha-${{ env.GITHUB_SHA_SHORT }}${{ env.LABEL }} | ||
# If main, release or tag | ||
- name: Extract metadata (tags, labels) for Docker | ||
if: ${{ github.event_name != 'pull_request' }} | ||
id: meta | ||
uses: docker/[email protected] | ||
with: | ||
flavor: | | ||
latest=auto | ||
images: | | ||
registry.internal.huggingface.tech/api-inference/community/text-generation-inference | ||
ghcr.io/huggingface/text-generation-inference | ||
db4c2190dd824d1f950f5d1555fbadf0.azurecr.io/text-generation-inference | ||
tags: | | ||
type=semver,pattern={{version}}${{ env.LABEL }} | ||
type=semver,pattern={{major}}.{{minor}}${{ env.LABEL }} | ||
type=raw,value=latest${{ env.LABEL }},enable=${{ github.ref == format('refs/heads/{0}', github.event.repository.default_branch) }} | ||
type=raw,value=sha-${{ env.GITHUB_SHA_SHORT }}${{ env.LABEL }} | ||
- name: Build and push Docker image | ||
id: build-and-push | ||
uses: docker/build-push-action@v4 | ||
with: | ||
context: . | ||
file: ${{ env.DOCKERFILE }} | ||
push: true | ||
platforms: 'linux/amd64' | ||
build-args: | | ||
GIT_SHA=${{ env.GITHUB_SHA }} | ||
DOCKER_LABEL=sha-${{ env.GITHUB_SHA_SHORT }}${{ env.LABEL }} | ||
PLATFORM=${{ env.PLATFORM }} | ||
tags: ${{ steps.meta.outputs.tags || steps.meta-pr.outputs.tags }} | ||
labels: ${{ steps.meta.outputs.labels || steps.meta-pr.outputs.labels }} | ||
cache-from: type=s3,region=us-east-1,bucket=ci-docker-buildx-cache,name=text-generation-inference-cache${{ env.LABEL }},mode=min,access_key_id=${{ secrets.S3_CI_DOCKER_BUILDX_CACHE_ACCESS_KEY_ID }},secret_access_key=${{ secrets.S3_CI_DOCKER_BUILDX_CACHE_SECRET_ACCESS_KEY }},mode=min | ||
cache-to: type=s3,region=us-east-1,bucket=ci-docker-buildx-cache,name=text-generation-inference-cache${{ env.LABEL }},mode=min,access_key_id=${{ secrets.S3_CI_DOCKER_BUILDX_CACHE_ACCESS_KEY_ID }},secret_access_key=${{ secrets.S3_CI_DOCKER_BUILDX_CACHE_SECRET_ACCESS_KEY }},mode=min | ||
- name: Final | ||
id: final | ||
run: | | ||
echo "docker_image=registry.internal.huggingface.tech/api-inference/community/text-generation-inference:sha-${{ env.GITHUB_SHA_SHORT}}${{ env.LABEL }}" >> "$GITHUB_OUTPUT" | ||
echo "docker_devices=${{ env.DOCKER_DEVICES }}" >> "$GITHUB_OUTPUT" | ||
echo "runs_on=${{ env.RUNS_ON }}" >> "$GITHUB_OUTPUT" | ||
echo "label=${{ env.LABEL }}" >> "$GITHUB_OUTPUT" | ||
integration_tests: | ||
concurrency: | ||
group: ${{ github.workflow }}-${{ github.job }}-${{ needs.build-and-push.outputs.label }}-${{ github.head_ref || github.run_id }} | ||
cancel-in-progress: true | ||
needs: build-and-push | ||
runs-on: | ||
group: ${{ needs.build-and-push.outputs.runs_on }} | ||
if: needs.build-and-push.outputs.runs_on != 'ubuntu-latest' | ||
env: | ||
PYTEST_FLAGS: ${{ (startsWith(github.ref, 'refs/tags/') || github.ref == 'refs/heads/main' || inputs.release-tests == true) && '--release' || '--release' }} | ||
steps: | ||
- name: Checkout repository | ||
uses: actions/checkout@v4 | ||
- name: Inject slug/short variables | ||
uses: rlespinasse/[email protected] | ||
- name: Set up Python | ||
uses: actions/setup-python@v4 | ||
with: | ||
python-version: "3.10" | ||
- name: Install | ||
run: | | ||
make install-integration-tests | ||
- name: Run tests | ||
run: | | ||
export DOCKER_VOLUME=/mnt/cache | ||
export DOCKER_IMAGE=${{ needs.build-and-push.outputs.docker_image }} | ||
export DOCKER_DEVICES=${{ needs.build-and-push.outputs.docker_devices }} | ||
export HF_TOKEN=${{ secrets.HF_TOKEN }} | ||
echo $DOCKER_IMAGE | ||
pytest -s -vv integration-tests ${PYTEST_FLAGS} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
name: Build documentation | ||
|
||
on: | ||
push: | ||
paths: | ||
- "docs/source/**" | ||
branches: | ||
- main | ||
- doc-builder* | ||
- v*-release | ||
|
||
jobs: | ||
build: | ||
uses: huggingface/doc-builder/.github/workflows/build_main_documentation.yml@main | ||
with: | ||
commit_sha: ${{ github.sha }} | ||
package: text-generation-inference | ||
additional_args: --not_python_module | ||
secrets: | ||
hf_token: ${{ secrets.HF_DOC_BUILD_PUSH }} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
name: Build PR Documentation | ||
|
||
on: | ||
pull_request: | ||
paths: | ||
- "docs/source/**" | ||
|
||
concurrency: | ||
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} | ||
cancel-in-progress: true | ||
|
||
jobs: | ||
build: | ||
uses: huggingface/doc-builder/.github/workflows/build_pr_documentation.yml@main | ||
with: | ||
commit_sha: ${{ github.event.pull_request.head.sha }} | ||
pr_number: ${{ github.event.number }} | ||
package: text-generation-inference | ||
additional_args: --not_python_module |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
name: Python Client Tests | ||
|
||
on: | ||
pull_request: | ||
paths: | ||
- ".github/workflows/client-tests.yaml" | ||
- "clients/python/**" | ||
|
||
jobs: | ||
run_tests: | ||
runs-on: ubuntu-latest | ||
|
||
steps: | ||
- uses: actions/checkout@v2 | ||
- name: Set up Python | ||
uses: actions/setup-python@v1 | ||
with: | ||
python-version: 3.9 | ||
- name: Install | ||
run: | | ||
cd clients/python && pip install . | ||
- name: Run tests | ||
run: | | ||
pip install pytest pytest-asyncio | ||
export HF_TOKEN=${{ secrets.HF_TOKEN }} | ||
make python-client-tests |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
name: Nightly load test | ||
|
||
on: | ||
schedule: | ||
- cron: '0 0 * * 1-5' | ||
|
||
pull_request: | ||
paths: | ||
- ".github/workflows/load_test.yaml" | ||
branches: | ||
- 'main' | ||
|
||
jobs: | ||
load-tests: | ||
concurrency: | ||
group: ${{ github.workflow }}-${{ github.job }}-${{ github.head_ref || github.run_id }} | ||
cancel-in-progress: true | ||
runs-on: | ||
group: aws-g5-12xlarge | ||
env: | ||
DOCKER_VOLUME: /cache | ||
steps: | ||
- name: Checkout repository | ||
uses: actions/checkout@v3 | ||
|
||
- name: Install k6 | ||
run: | | ||
curl https://github.com/grafana/k6/releases/download/v0.44.0/k6-v0.44.0-linux-amd64.tar.gz -L | tar xvz --strip-components 1 | ||
- name: Start starcoder | ||
run: | | ||
docker run --name tgi-starcoder --rm --gpus all -p 3000:80 -v /mnt/cache:/data -e HF_TOKEN=${{ secrets.HF_TOKEN }} --pull always -d ghcr.io/huggingface/text-generation-inference:latest --model-id bigcode/starcoder --num-shard 2 --max-batch-total-tokens 32768 | ||
sleep 10 | ||
wget --timeout 10 --retry-on-http-error --waitretry=1 --tries=240 http://localhost:3000/health | ||
- name: Run k6 | ||
run: | | ||
./k6 run load_tests/starcoder_load.js | ||
- name: Stop starcoder | ||
if: ${{ always() }} | ||
run: | | ||
docker stop tgi-starcoder || true |
Oops, something went wrong.