Skip to content

Commit

Permalink
Add tei and tgi chart (#8)
Browse files Browse the repository at this point in the history
Signed-off-by: Sanket Sudake <[email protected]>
  • Loading branch information
sanketsudake authored Jul 15, 2024
1 parent ff32720 commit a4eabb8
Show file tree
Hide file tree
Showing 23 changed files with 513 additions and 53 deletions.
File renamed without changes.
24 changes: 24 additions & 0 deletions charts/text-embeddings-inference/Chart.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
apiVersion: v2
name: text-embeddings-inference
description: A Helm chart for deploying Text Embeddings Inference from Hugging Face

# A chart can be either an 'application' or a 'library' chart.
#
# Application charts are a collection of templates that can be packaged into versioned archives
# to be deployed.
#
# Library charts provide useful utilities or functions for the chart developer. They're included as
# a dependency of application charts to inject those utilities and functions into the rendering
# pipeline. Library charts do not define any templates and therefore cannot be deployed.
type: application

# This is the chart version. This version number should be incremented each time you make changes
# to the chart and its templates, including the app version.
# Versions are expected to follow Semantic Versioning (https://semver.org/)
version: 0.1.0

# This is the version number of the application being deployed. This version number should be
# incremented each time you make changes to the application. Versions are not expected to
# follow Semantic Versioning. They should reflect the version the application is using.
# It is recommended to use it with quotes.
appVersion: "1.5.0"
Original file line number Diff line number Diff line change
Expand Up @@ -6,16 +6,16 @@
{{- end }}
{{- end }}
{{- else if contains "NodePort" .Values.service.type }}
export NODE_PORT=$(kubectl get --namespace {{ .Release.Namespace }} -o jsonpath="{.spec.ports[0].nodePort}" services {{ include "tgi.fullname" . }})
export NODE_PORT=$(kubectl get --namespace {{ .Release.Namespace }} -o jsonpath="{.spec.ports[0].nodePort}" services {{ include "text-embeddings-inference.fullname" . }})
export NODE_IP=$(kubectl get nodes --namespace {{ .Release.Namespace }} -o jsonpath="{.items[0].status.addresses[0].address}")
echo http://$NODE_IP:$NODE_PORT
{{- else if contains "LoadBalancer" .Values.service.type }}
NOTE: It may take a few minutes for the LoadBalancer IP to be available.
You can watch its status by running 'kubectl get --namespace {{ .Release.Namespace }} svc -w {{ include "tgi.fullname" . }}'
export SERVICE_IP=$(kubectl get svc --namespace {{ .Release.Namespace }} {{ include "tgi.fullname" . }} --template "{{"{{ range (index .status.loadBalancer.ingress 0) }}{{.}}{{ end }}"}}")
You can watch its status by running 'kubectl get --namespace {{ .Release.Namespace }} svc -w {{ include "text-embeddings-inference.fullname" . }}'
export SERVICE_IP=$(kubectl get svc --namespace {{ .Release.Namespace }} {{ include "text-embeddings-inference.fullname" . }} --template "{{"{{ range (index .status.loadBalancer.ingress 0) }}{{.}}{{ end }}"}}")
echo http://$SERVICE_IP:{{ .Values.service.port }}
{{- else if contains "ClusterIP" .Values.service.type }}
export POD_NAME=$(kubectl get pods --namespace {{ .Release.Namespace }} -l "app.kubernetes.io/name={{ include "tgi.name" . }},app.kubernetes.io/instance={{ .Release.Name }}" -o jsonpath="{.items[0].metadata.name}")
export POD_NAME=$(kubectl get pods --namespace {{ .Release.Namespace }} -l "app.kubernetes.io/name={{ include "text-embeddings-inference.name" . }},app.kubernetes.io/instance={{ .Release.Name }}" -o jsonpath="{.items[0].metadata.name}")
export CONTAINER_PORT=$(kubectl get pod --namespace {{ .Release.Namespace }} $POD_NAME -o jsonpath="{.spec.containers[0].ports[0].containerPort}")
echo "Visit http://127.0.0.1:8080 to use your application"
kubectl --namespace {{ .Release.Namespace }} port-forward $POD_NAME 8080:$CONTAINER_PORT
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{{/*
Expand the name of the chart.
*/}}
{{- define "tgi.name" -}}
{{- define "text-embeddings-inference.name" -}}
{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
{{- end }}

Expand All @@ -10,7 +10,7 @@ Create a default fully qualified app name.
We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
If release name contains chart name it will be used as a full name.
*/}}
{{- define "tgi.fullname" -}}
{{- define "text-embeddings-inference.fullname" -}}
{{- if .Values.fullnameOverride }}
{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
{{- else }}
Expand All @@ -26,16 +26,16 @@ If release name contains chart name it will be used as a full name.
{{/*
Create chart name and version as used by the chart label.
*/}}
{{- define "tgi.chart" -}}
{{- define "text-embeddings-inference.chart" -}}
{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
{{- end }}

{{/*
Common labels
*/}}
{{- define "tgi.labels" -}}
helm.sh/chart: {{ include "tgi.chart" . }}
{{ include "tgi.selectorLabels" . }}
{{- define "text-embeddings-inference.labels" -}}
helm.sh/chart: {{ include "text-embeddings-inference.chart" . }}
{{ include "text-embeddings-inference.selectorLabels" . }}
{{- if .Chart.AppVersion }}
app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
{{- end }}
Expand All @@ -45,17 +45,17 @@ app.kubernetes.io/managed-by: {{ .Release.Service }}
{{/*
Selector labels
*/}}
{{- define "tgi.selectorLabels" -}}
app.kubernetes.io/name: {{ include "tgi.name" . }}
{{- define "text-embeddings-inference.selectorLabels" -}}
app.kubernetes.io/name: {{ include "text-embeddings-inference.name" . }}
app.kubernetes.io/instance: {{ .Release.Name }}
{{- end }}

{{/*
Create the name of the service account to use
*/}}
{{- define "tgi.serviceAccountName" -}}
{{- define "text-embeddings-inference.serviceAccountName" -}}
{{- if .Values.serviceAccount.create }}
{{- default (include "tgi.fullname" .) .Values.serviceAccount.name }}
{{- default (include "text-embeddings-inference.fullname" .) .Values.serviceAccount.name }}
{{- else }}
{{- default "default" .Values.serviceAccount.name }}
{{- end }}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,24 +1,24 @@
apiVersion: apps/v1
kind: {{ .Values.kind }}
metadata:
name: {{ include "tgi.fullname" . }}
name: {{ include "text-embeddings-inference.fullname" . }}
labels:
{{- include "tgi.labels" . | nindent 4 }}
{{- include "text-embeddings-inference.labels" . | nindent 4 }}
spec:
{{- if not .Values.autoscaling.enabled }}
replicas: {{ .Values.replicaCount }}
{{- end }}
selector:
matchLabels:
{{- include "tgi.selectorLabels" . | nindent 6 }}
{{- include "text-embeddings-inference.selectorLabels" . | nindent 6 }}
template:
metadata:
{{- with .Values.podAnnotations }}
annotations:
{{- toYaml . | nindent 8 }}
{{- end }}
labels:
{{- include "tgi.labels" . | nindent 8 }}
{{- include "text-embeddings-inference.labels" . | nindent 8 }}
{{- with .Values.podLabels }}
{{- toYaml . | nindent 8 }}
{{- end }}
Expand All @@ -27,7 +27,7 @@ spec:
imagePullSecrets:
{{- toYaml . | nindent 8 }}
{{- end }}
serviceAccountName: {{ include "tgi.serviceAccountName" . }}
serviceAccountName: {{ include "text-embeddings-inference.serviceAccountName" . }}
securityContext:
{{- toYaml .Values.podSecurityContext | nindent 8 }}
containers:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,14 @@
apiVersion: autoscaling/v2
kind: HorizontalPodAutoscaler
metadata:
name: {{ include "tgi.fullname" . }}
name: {{ include "text-embeddings-inference.fullname" . }}
labels:
{{- include "tgi.labels" . | nindent 4 }}
{{- include "text-embeddings-inference.labels" . | nindent 4 }}
spec:
scaleTargetRef:
apiVersion: apps/v1
kind: Deployment
name: {{ include "tgi.fullname" . }}
name: {{ include "text-embeddings-inference.fullname" . }}
minReplicas: {{ .Values.autoscaling.minReplicas }}
maxReplicas: {{ .Values.autoscaling.maxReplicas }}
metrics:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{{- if .Values.ingress.enabled -}}
{{- $fullName := include "tgi.fullname" . -}}
{{- $fullName := include "text-embeddings-inference.fullname" . -}}
{{- $svcPort := .Values.service.port -}}
{{- if and .Values.ingress.className (not (semverCompare ">=1.18-0" .Capabilities.KubeVersion.GitVersion)) }}
{{- if not (hasKey .Values.ingress.annotations "kubernetes.io/ingress.class") }}
Expand All @@ -17,7 +17,7 @@ kind: Ingress
metadata:
name: {{ $fullName }}
labels:
{{- include "tgi.labels" . | nindent 4 }}
{{- include "text-embeddings-inference.labels" . | nindent 4 }}
{{- with .Values.ingress.annotations }}
annotations:
{{- toYaml . | nindent 4 }}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
apiVersion: v1
kind: Service
metadata:
name: {{ include "tgi.fullname" . }}
name: {{ include "text-embeddings-inference.fullname" . }}
labels:
{{- include "tgi.labels" . | nindent 4 }}
{{- include "text-embeddings-inference.labels" . | nindent 4 }}
spec:
type: {{ .Values.service.type }}
ports:
Expand All @@ -12,4 +12,4 @@ spec:
protocol: TCP
name: http
selector:
{{- include "tgi.selectorLabels" . | nindent 4 }}
{{- include "text-embeddings-inference.selectorLabels" . | nindent 4 }}
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@
apiVersion: v1
kind: ServiceAccount
metadata:
name: {{ include "tgi.serviceAccountName" . }}
name: {{ include "text-embeddings-inference.serviceAccountName" . }}
labels:
{{- include "tgi.labels" . | nindent 4 }}
{{- include "text-embeddings-inference.labels" . | nindent 4 }}
{{- with .Values.serviceAccount.annotations }}
annotations:
{{- toYaml . | nindent 4 }}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
apiVersion: v1
kind: Pod
metadata:
name: "{{ include "text-embeddings-inference.fullname" . }}-test-connection"
labels:
{{- include "text-embeddings-inference.labels" . | nindent 4 }}
annotations:
"helm.sh/hook": test
spec:
containers:
- name: wget
image: busybox
command: ['wget']
args: ['{{ include "text-embeddings-inference.fullname" . }}:{{ .Values.service.port }}']
restartPolicy: Never
124 changes: 124 additions & 0 deletions charts/text-embeddings-inference/values.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
# Default values for text-embeddings-inference.
# This is a YAML-formatted file.
# Declare variables to be passed into your templates.

replicaCount: 1

kind: Deployment

image:
repository: ghcr.io/huggingface/text-embeddings-inference
pullPolicy: IfNotPresent
# Overrides the image tag whose default is the chart appVersion.
tag: ""

command: ["text-embeddings-launcher"]

env:
# - name: MY_ENV_VAR
# value: my-env-var-value
# Reference: https://huggingface.co/docs/text-embeddings-inference/cli_arguments
- name: MODEL_ID
value: BAAI/bge-large-en-v1.5
- name: MAX_CLIENT_BATCH_SIZE
value: "1024"
- name: RUST_BACKTRACE
value: "full"

imagePullSecrets: []
nameOverride: ""
fullnameOverride: ""

serviceAccount:
# Specifies whether a service account should be created
create: true
# Automatically mount a ServiceAccount's API credentials?
automount: true
# Annotations to add to the service account
annotations: {}
# The name of the service account to use.
# If not set and create is true, a name is generated using the fullname template
name: ""

podAnnotations: {}
podLabels: {}

podSecurityContext: {}
# fsGroup: 2000

securityContext: {}
# capabilities:
# drop:
# - ALL
# readOnlyRootFilesystem: true
# runAsNonRoot: true
# runAsUser: 1000

service:
type: ClusterIP
port: 80

ingress:
enabled: false
className: ""
annotations: {}
# kubernetes.io/ingress.class: nginx
# kubernetes.io/tls-acme: "true"
hosts:
- host: chart-example.local
paths:
- path: /
pathType: ImplementationSpecific
tls: []
# - secretName: chart-example-tls
# hosts:
# - chart-example.local

resources: {}
# We usually recommend not to specify default resources and to leave this as a conscious
# choice for the user. This also increases chances charts run on environments with little
# resources, such as Minikube. If you do want to specify resources, uncomment the following
# lines, adjust them as necessary, and remove the curly braces after 'resources:'.
# limits:
# cpu: 100m
# memory: 128Mi
# nvidia.com/gpu: 1
# requests:
# cpu: 100m
# memory: 128Mi
# nvidia.com/gpu: 1

livenessProbe:
httpGet:
path: /health
port: http
readinessProbe:
httpGet:
path: /health
port: http

autoscaling:
enabled: false
minReplicas: 1
maxReplicas: 100
targetCPUUtilizationPercentage: 80
# targetMemoryUtilizationPercentage: 80

# Additional volumes on the output Deployment definition.
volumes: []
# - name: foo
# secret:
# secretName: mysecret
# optional: false

# Additional volumeMounts on the output Deployment definition.
volumeMounts: []
# - name: foo
# mountPath: "/etc/foo"
# readOnly: true

nodeSelector: {}

tolerations: []

affinity: {}
23 changes: 23 additions & 0 deletions charts/text-generation-inference/.helmignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# Patterns to ignore when building packages.
# This supports shell glob matching, relative path matching, and
# negation (prefixed with !). Only one pattern per line.
.DS_Store
# Common VCS dirs
.git/
.gitignore
.bzr/
.bzrignore
.hg/
.hgignore
.svn/
# Common backup files
*.swp
*.bak
*.tmp
*.orig
*~
# Various IDEs
.project
.idea/
*.tmproj
.vscode/
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
apiVersion: v2
name: tgi
description: Deployment of Text Generation Inference from Hugging Face
name: text-generation-inference
description: A Helm chart for deploying Text Generation Inference from Hugging Face

# A chart can be either an 'application' or a 'library' chart.
#
Expand Down
Loading

0 comments on commit a4eabb8

Please sign in to comment.