-
Notifications
You must be signed in to change notification settings - Fork 10
/
Copy pathnomic-embed-raw.yaml
81 lines (81 loc) · 1.95 KB
/
nomic-embed-raw.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
---
apiVersion: serving.kserve.io/v1alpha1
kind: ServingRuntime
metadata:
annotations:
opendatahub.io/accelerator-name: medium-gpu-card
opendatahub.io/apiProtocol: REST
openshift.io/display-name: "TEI-GPU"
opendatahub.io/template-display-name: TEI-GPU
opendatahub.io/template-name: tei-gpu
name: tei-gpu
labels:
opendatahub.io/dashboard: 'true'
spec:
builtInAdapter:
modelLoadingTimeoutMillis: 90000
containers:
- args:
- "--model-id=/mnt/models/"
- "--port=8080"
image: 'ghcr.io/huggingface/text-embeddings-inference:turing-1.5'
name: kserve-container
env:
- name: HF_HOME
value: /tmp/hf_home
- name: HUGGINGFACE_HUB_CACHE
value: /tmp/hf_hub_cache
- name: TRANSFORMER_CACHE
value: /tmp/transformers_cache
ports:
- containerPort: 8080
name: http1
protocol: TCP
volumeMounts:
- mountPath: /dev/shm
name: shm
multiModel: false
supportedModelFormats:
- autoSelect: true
name: sbert
volumes:
- emptyDir:
medium: Memory
sizeLimit: 2Gi
name: shm
---
apiVersion: serving.kserve.io/v1beta1
kind: InferenceService
metadata:
annotations:
openshift.io/display-name: Nomic-embed-text-v1.5
serving.kserve.io/deploymentMode: RawDeployment
name: nomic-embed-text-v1-5
labels:
opendatahub.io/dashboard: 'true'
spec:
predictor:
maxReplicas: 1
minReplicas: 1
model:
modelFormat:
name: sbert
name: ''
resources:
limits:
cpu: '2'
memory: 8Gi
nvidia.com/gpu: '1'
requests:
cpu: '1'
memory: 4Gi
nvidia.com/gpu: '1'
runtime: tei-gpu
storage:
key: aws-connection-models
path: nomic-ai/nomic-embed-text-v1.5/
tolerations:
- effect: NoSchedule
key: nvidia.com/gpu
operator: Equal
value: 'Tesla-T4-SHARED'