Created a profile called rhoai_inference_performance_blog_setup for…

… testing the "granite-3b-code-instruct" LLM model using local-ci Fixed `KeyErrors` by using the right variable heirarchy Do not throw exceptions but instead return the control back to the source Enabled tests visualization Variable `export_artifacts.enabled: true` appearing to cause few insignificant issues at the test_ci, plot stages Revert "Do not throw exceptions but instead return the control back to the source" This reverts commit 9bc33c2. using small model, phi-2 for deploying on a singel T4 tensor core gpu Fixed the `ValueError: Bare-metal cluster not recognized` by adding a new cluster metal-profile updating kserve deployment mode from Raw to Serverless stick to the rawDeployment mode Execute llm-load-test for 10 minutes Deply Kserve in serverless mode Revert "Deply Kserve in serverless mode" This reverts commit 1204060. test mixtral-8x7b using 2.12 rc2 build Use `phi-2` model for the testing efforts
openshift-psap · Aug 26, 2024 · a408bc9 · a408bc9
1 parent 656e220
commit a408bc9
Showing 1 changed file with 23 additions and 0 deletions.
diff --git a/projects/kserve/testing/config.yaml b/projects/kserve/testing/config.yaml
@@ -35,6 +35,9 @@ ci_presets:
   cluster_a30:
     clusters.sutest.compute.machineset.type: "PSAP A30 node"
 
+  cluster_t4:
+    clusters.sutest.compute.machineset.type: "PSAP T4 node"
+
   cluster_2xa100:
     clusters.sutest.compute.machineset.type: "Beaker A100-80GB"
 
@@ -282,6 +285,25 @@ ci_presets:
     tests.e2e.llm_load_test.args.concurrency: [1, 2, 4, 8, 16, 32, 64, 96, 128]
     export_artifacts.enabled: true
 
+  ##### PSAP 1474 Setup
+  rhoai_inference_performance_blog_setup:
+    extends: [vllm, e2e_perf]
+    tests.e2e.models:
+    - name: phi-2
+      testing:
+        size: small  # Determines the dataset size
+        max_concurrency: 128  # I guess, it defines cap on the concurrency
+    tests.e2e.matbenchmark.enabled: true
+    tests.e2e.llm_load_test.enabled: true
+    tests.e2e.llm_load_test.args.duration: 60
+    tests.e2e.llm_load_test.args.concurrency: 1    #Also supports List
+    tests.visualize: true
+    matbench.lts.opensearch.export.enabled: false
+    export_artifacts.enabled: false
+    export_artifacts.bucket: rhods-baremetal-results
+    export_artifacts.path_prefix: local-ci/rhods/kserve
+  ##### PSAP 1474 End of Setup
+
   # --
   # single-model vLLM
   # --
@@ -461,6 +483,7 @@ clusters:
     e26-h23-000-r650: cluster_icelake
     bb37-h13-000-r750.rdu3.labs.perfscale.redhat.com: cluster_a30
     nvd-srv-02.nvidia.eng.rdu2.redhat.com: cluster_2xa100
+    x37-h13-000-r740xd.rdu3.labs.perfscale.redhat.com: cluster_t4
   create:
     type: single # can be: single, ocp, managed
     keep: false