From 62a897c843e70ebbe9d62b644d218d00d98cf37d Mon Sep 17 00:00:00 2001 From: Sanket Sudake Date: Thu, 19 Sep 2024 18:55:36 +0530 Subject: [PATCH] Add vllm chart in ai-stack (#18) Signed-off-by: Sanket --- charts/ai-stack/Chart.lock | 7 ++-- charts/ai-stack/Chart.yaml | 8 ++++- charts/ai-stack/README.md | 1 + charts/ai-stack/charts/vllm-0.1.0.tgz | Bin 0 -> 5296 bytes charts/ai-stack/values.yaml | 46 ++++++++++++++++++++++++++ 5 files changed, 59 insertions(+), 3 deletions(-) create mode 100644 charts/ai-stack/charts/vllm-0.1.0.tgz diff --git a/charts/ai-stack/Chart.lock b/charts/ai-stack/Chart.lock index fa59f6a..2fa2ecf 100644 --- a/charts/ai-stack/Chart.lock +++ b/charts/ai-stack/Chart.lock @@ -11,5 +11,8 @@ dependencies: - name: chromadb repository: https://infracloudio.github.io/charts version: 0.1.3 -digest: sha256:0febd220a71c6533c04a53affcfbeca2a77261acba6ded41f424cc34c2a056ff -generated: "2024-08-19T19:46:03.544448+05:30" +- name: vllm + repository: https://infracloudio.github.io/charts + version: 0.1.0 +digest: sha256:14b5e60e54b3618e5d950841fee42743eb9d50d2fed44d8d46484c97adbffde6 +generated: "2024-09-19T18:46:10.274968+05:30" diff --git a/charts/ai-stack/Chart.yaml b/charts/ai-stack/Chart.yaml index f98cab6..68b4e04 100644 --- a/charts/ai-stack/Chart.yaml +++ b/charts/ai-stack/Chart.yaml @@ -16,7 +16,7 @@ type: application # This is the chart version. This version number should be incremented each time you make changes # to the chart and its templates, including the app version. # Versions are expected to follow Semantic Versioning (https://semver.org/) -version: 0.3.8 +version: 0.4.0 # This is the version number of the application being deployed. This version number should be # incremented each time you make changes to the application. Versions are not expected to @@ -49,6 +49,12 @@ dependencies: alias: vectordb condition: vectordb.enabled + - name: vllm + version: 0.1.0 + repository: "https://infracloudio.github.io/charts" + alias: vllm + condition: vllm.enabled + keywords: - ai-stack - ai-services diff --git a/charts/ai-stack/README.md b/charts/ai-stack/README.md index 0668837..9169a89 100644 --- a/charts/ai-stack/README.md +++ b/charts/ai-stack/README.md @@ -13,6 +13,7 @@ The AI stack consists of the following components: - [Text Generation Inference(TGI)](../text-generation-inference/) - [Grafana Dashboards](../infracloud-dashboards) - [ChromaDB](../chromadb) +- [vLLM](../vllm) ## Setup Helm Repository diff --git a/charts/ai-stack/charts/vllm-0.1.0.tgz b/charts/ai-stack/charts/vllm-0.1.0.tgz new file mode 100644 index 0000000000000000000000000000000000000000..13b09cd9568d2cd37e2404cd888c054118d705a7 GIT binary patch literal 5296 zcmV;h6i@3PiwG0|00000|0w_~VMtOiV@ORlOnEsqVl!4SWK%V1T2nbTPgYhoO;>Dc zVQyr3R8em|NM&qo0PKD1a@#nv@ciae^e1~OmyIaN*DUMHp7PqB8CQHO+sW3})RYa8 zEeT@~-~ym*jmPKNPXXXvl;unkXLFA6!x70wccam0G#U+L%2?dnoe-)08Hw2sU!(N< z{r=X*hW)qS?>GPLuW$B$7;J29ulLsmTZ8Q%`h&H>X8#B1e-&;TB~_ZpANrq7tJ%2k zq>zMuM5(CY0Zi9Ck|gD0-{0~Fej=1!^CDECq=~j)cHn1ZF@#2KU?e0&m@qM;dBh&|I}i~|CF#4 z)mJkBOUD29_WE{n{BNvnJ&*sVC@ZjsBa$)=P6_OAHcH`pD{wKP3aA1C|Jpe^?2d$t ziPjjwh%z()dl)hz5vD{^GGwSg3m76KL@A7baxGvcQVAO4gb|I(_dJPqli`Cx zcTIT|z@D`sF>>vBN=u?~JPY9N-UC&L{*De<#4+KKk*7?# zmpumX%dhjWM2=G4uiMIl0I6{d$#GtUJkqDJ6AQ-a^Vq_`W+J=0p(1%^ZjW!TCI@#Mlygb+gqJU9l zR7i06|2Zp@kspeT^522cYlN$DqSvlKNVqylknbo(l_p&G1W`(N)kt>--DIXGlynDc z!z7OP3aNSB z8!{o{?&elM?s!(wE-la{Lmoh9WAL}Nj^`ntI=Otlygv<#LOG>;+(SOSoDx~!=z^P6 zfBy0E;^h7Qu?GNdr2i;I9N0$yg`q_K9%pAb%HIK8d~ zVyt^7m&Yd;mltO{yYHJ&*`Pm789TQvb_#7O<^$Od#!<@H+)K}jIK-VWw1&%e(0PI( z9Z^(pGeJE;2?R1YAPx}J#H!g4A(TioMjm9p0?;x=i)bg+A|{%Kgs~aKwjzP^8p#(5 zc1{mW-N@)SVd}p;0Agx7Q(9u0*BHa^*4-PeioS;4Tr-AsHfbPv-F) z!&Fi|+Z9~ntuERyQa?$NCIPJV`+d`U?O-8E$dEFssqr@5ED}kPF->a{TObYq+d!H-a`sPK-)T$$&wkbD%u{v{j-2MuaJ>Xzk3RG168E2_4b}Xy$bo zD9^geR<`}sL`}IyIVB8;hMNfuClI5SG*lIIDwHI#Ln4O`V7;&Aa5*^8G)9pcwRJBp z+_RCatCJ8tG3uGY8)qgXYGh0F#MqbO#fljk)JTyqU{z4fSDU^TLCpwT@qT{M&S(piBMWOE8eva zf%81NcuV4hq0dA}SZ2$ONK|R^ySa>tHxA^&>gj>0Zl+MneP;VYqsGixQafb2*>vKX zTqE7pOl9M$WOF4a-NvPtt6*uI{)v#P)Y?gj#=}7#SrcTsJyV(7^D1gX=1VaoQ={s} zsWpCWD7Xr#NF7_@L>Mz?w8nIPs!;mQ2_{SlpxkJ`LSC5(&5=yw;LRwUoQoYG*nHW8#MLvBl0PHy|YXEmih= zYzS}+Q7Iy4Z*2P|#$uXTk5sZ53?;F$8EKU8q7?NDKSLVRvX8>7P+!Lz3SJ4N-mm^?#wwtsb{eU?%j`_?a?MzrG&It@m`GF;!6MhX5uq%VSb4kd znVEuq2b^wrp79=3NX){h)p*5}pSk6la#&(=TY1s%+ck_FquxFJ@Ig~Xf49cr6lG`( zg;|`v?YmW&1v|2Ry%^G4ft}JOPX$Y3RAz7)tre+GQe9aont^WRWmZ!eu(d=hg+n+J z!doeP+`wC@c)ZR;#WVMmeG-Wyo(rLkqa))6eG*%)#sc1~|8{ECqSHei_C`W z$ovV9iwMt=VQ5?eJN0P6j9cZV6SF)cBTL(@otLiv?TWWpo%fDUF80rTeXGA@jA2Rq zXMMfD(TM-7Z*6Wo$A6xp41D+r-HcL+p2CN-Lm1A?#KtpUT@~R36?k`dU7#cIKf1tS z?yh9!1McrFgd}{7uxe&{2;R*>TMxj%0+i6c^+?=XV-WA|458I#UCj)Py1N52``_OO zz24oOBW3Zz{r%mYLsn97pkDJQQ)p;}f@?y#g3hss@Ki|MDOk>Yao_ms4jJIMtuxjcSZ7TS^n1DxXA= z=?Ap$vWNSQWgwgg+jBv$x0&qJI!(yej#S*a5(pr~nQ$#AN1zHv~p_ zW2UN9X8}y3&Q+9faGFIXi{PMNadF{aB88A}xFI^60M!ayJF8>h#V7Z#nugH5S=MVW z%3eF)Kl^yF+tP6s(_=_3p#|M;ZtJ0Qch|YQ%ZA-c$|Jmmyn9(icKMit{wq7Gd^@z< z0P5Vgbb8r}!0_4VOWV+eL$6p0KVs~ip6p#7?;PznWR-~Mu{zG6Lz2X=FR4t$cXM(q zW%y>k3gt>0S-oqLG9p?ylw#B*BAQs-nH*iqc2ABkb`FmB&)Q5&BBIANZ#5TCtD(tc zj?(f{n4RLavpq%Uc0N)?b>7E=wQaxe_x(Zew*R)D#e-a=zcP!M)o9N7qc}EPbVowo z5E)h2H#CCPx=^ZC{9DhV-*ajBf0q+v2}F&mBiY<+<;d@>F|eH$nX%h*6mLkpQob{ zy@gJ<)9}qa%i6AsCzFDLnb2nN@02h)q8J%_W1057f1<-};kBV)8lYf^Au;Aji5TI1 zVYppWM-*A4fJk)V4$MM-RKd%ki#_fgpBq@?0~pKs%J&XNAf5ekNQ(K(ZSlE?w}u`_gLjYb&63AL1P)1a z7Ib{pUVb8TiKJYQpz{~i{fp`}a5hMrh_aWtFm@CFU+9ISFAy?O}`}G;6Y|k-Oh(KEj4gfOcs9t zr#wQgVZ&53o>q{>?WjvJ$hoQPSyUsAHj^vcn5ghx%{FsjMRHlelNSFXF)IyC2GHcG z7DWWeX9I^PCOVuPRvO#Z-1#}OB(deN=3_C#T9J5;((Wwgyw-ChiXd5Sm$`^^Rohyj z{I;xCYoIBt)}qIDVFhD5&rSeMsE2u*<1W{cRwB(*pAyep642+!XMPrK85C_(tzX4a z&rNCNr<7eIUY|h~;6FTesHT<4D0kZ~Eq+wRDwgS%Si5-)2tm*>oLbscz zyFI`OAJMVzQY&i+4T2gcive31$`~NGj)^X_iF`{LspdXDilJJCiW~n31{*9G5w?Le zw9cL>vh2-;#F0-QLEAumGHDxsZR$prX6e!bs-+gBttn$O%3iORNTEe2SO6Eh zrcot?lG5q`ibzt?L&M zuh7kt8a`)^nxj(9A}GOXxdmU8UY?RraZmCjg)d5{k_%BnRTCus9JO!lqkqlPSpQ8D z^5u^KEm{8!w%4}i@;|mVpVxm+QJV8>``W6uN-RNo)3t1|__L7ocQY%oQxWZC5GcRJ ziqTTOu*qh{afY^4u6aLq@(AGZeAaT~DsymEh838sQZ;{WQKb?ltK>44DhDfbk}+03E^PASIB(1VOC|9@vf-Z; zhD%j_DtSDD|IdwnHtc_%U-d=jfF<^SqrX0w+yC9%ezyNlQI=%#sbA z@!zLI%nw)oM0NJ_)vGdimxU5vPKbJsvpKqzs2Ow~IFiw+pj^E)tyHN0JhsK{Uy{2> zN6;pkzdcqo*$!4YIdarDLS7pabp6)t$qj0$T5aSuz9_jNYEq%utb(^1x%sK;uudp$ zLO*a~Z1+A&RsI}rqb=gt((M*n^rpCf8t5wM#cE!b>+5${1VcRnQ-&87}TYZh%gG?DNZ9|Q@ z{2Z2{WV4g!ShRU{1-p$?S&QYHqgPH(K&PpZEWtq}1+z%D4WrOsQ`Xz0t)K zvM~L>vRBg0j!rJ!0dec;)9N`(Sg;{d+jXh$@qgNe{>JxUM%%}KmO%D}%)rv|zumn5 zX=||l9RGci@~CXT<{GSNC^GQJ*oy~JA->#7tkH@uxF9QTTdBAFLR=$nGeZRndA^3r z*^TJd)o?zl&|s5S4w2QbrI{a*3+mL?M1R^6Ew)8D+2;K*)q0C9P-{@=>*OCIj}k$7 z)5EGbP328Ji=|uK{ZmF<>KFDbu37^u>z5&p_GO_p6&fj46_Q$ zo##R+&*%Hro|G(`vFH$GF}s*C`^on|XjIz$?S_JL4DB5jpE(UI_5Zfk8uovCef#70w!$qy$@gt+bmV2Bl_u$k?e=g!@XQRi06rJoK{ zH5-cC>}7ea<_iP*A~yorWtT6OwD^JZ?G}|pqg*;qMLotT%5U%T)ruYezL6~*b0P6- z&{#VD*EgH@|E_PXJ&*sVC@XMEv_|P}igY>vHxuMAOeu>fA45XIYcfVv+z71F#KvGi zO^`7dGcj~i6y@VLAdwMGr|9nDE8n||XgQAE4Kptji6eT8k#m>+-zy(ZIGcgswx(eT z5|s94GvD*~&M(ikkm#*IcB=8??m0wMD$gHN-Lrok`<_4iUH0tX{KaJ4Gk@|&HRZh$ z(U64KX=3j*R^Dr0-6Y;?e@L#q*S?O;Zz1W}d;Q;`hrrxB8# z1DE1A&!4JLMA-We8|tmaj{md$oxP)dKaRc*jb-bZ?HLdp8ubuthoF3y&ag8 zSNdy_-RKX8hex$5{ePJr9v=Pr@-cVvzw*456*xzoChorrI4jP1c;403)sU!(J#lPr z;I@};Qa+L-WFn2a+0BYMq4EnAenyoFcTibXu%zyG@2jgT&pStiUy4s9)43xNiBQ=G z`{&XC^=YT6s>_y`C#H~Gz;&~SWsQXYk6>bG)9&2|6Tp8J1 z1q~78wav$QF}VNZz&2`S1wL@Ph>4U}*>I$wJk!~vT7s7ZE-IIgy9*-~4--BU%F3;t;JeU9F<$nSI0RR7SBCApWY5)MS CJaEGR literal 0 HcmV?d00001 diff --git a/charts/ai-stack/values.yaml b/charts/ai-stack/values.yaml index 2e9d93b..fdcad0c 100644 --- a/charts/ai-stack/values.yaml +++ b/charts/ai-stack/values.yaml @@ -185,3 +185,49 @@ reranker: - name: hf-cache persistentVolumeClaim: claimName: hf-cache + + +# Values for vllm: the vllm chart +# Reference: https://artifacthub.io/packages/helm/infracloud-charts/vllm?modal=values +vllm: + enabled: false + + config: + model: "meta-llama/Meta-Llama-3.1-8B-Instruct" + + env: + - name: HF_API_TOKEN + valueFrom: + secretKeyRef: + name: hf-api-token + key: HF_API_TOKEN + - name: HF_HUB_OFFLINE + value: "1" + - name: HF_HUB_CACHE + value: "/model" + + resources: + limits: + nvidia.com/gpu: 1 + requests: + nvidia.com/gpu: 1 + + strategy: + type: Recreate + + service: + type: LoadBalancer + port: 8000 + + volumeMounts: + - name: hf-cache + mountPath: /model + + volumes: + - name: hf-cache + persistentVolumeClaim: + claimName: hf-cache + - name: shm + emptyDir: + medium: Memory + sizeLimit: "1Gi"