Add watsonx basic deployment tests for additional models (red-hat-dat…

…a-services#974) * add test with flan grammar model (1) Signed-off-by: bdattoma <[email protected]> * add streamed expected response + fix regex Signed-off-by: bdattoma <[email protected]> * add flan-t5-large deployment and query Signed-off-by: bdattoma <[email protected]> * update tags Signed-off-by: bdattoma <[email protected]> * fix robocop alerts Signed-off-by: bdattoma <[email protected]> --------- Signed-off-by: bdattoma <[email protected]>
bdattoma · Oct 17, 2023 · 579571a · 579571a
1 parent 7c00cbf
commit 579571a
Show file tree

Hide file tree

Showing 2 changed files with 73 additions and 5 deletions.
diff --git a/ods_ci/tests/Resources/Files/llm/model_expected_responses.json b/ods_ci/tests/Resources/Files/llm/model_expected_responses.json
@@ -11,9 +11,24 @@
                 "bloom-560m-caikit": {
                     "generatedTokenCount":  20,
                     "response_text": "The temperature of water boiling depends on the temperature of the water. The boiling point of water is the"
+                },
+                "flan-t5-large": {
+                    "generatedTokenCount":  7,
+                    "response_text": "212 ° f",
+                    "streamed_response_text":   "{  'details': {    'input_token_count': '8'  }}{  'tokens': [    {      'text': '▁',      'logprob': -1.2747352123260498    }  ],  'details': {    'generated_tokens': 1  }}{  'generated_text': '212',  'tokens': [    {      'text': '212',      'logprob': -1.0382558107376099    }  ],  'details': {    'generated_tokens': 2  }}{  'generated_text': ' ',  'tokens': [    {      'text': '▁',      'logprob': -0.8835393786430359    }  ],  'details': {    'generated_tokens': 3  }}{  'generated_text': '°',  'tokens': [    {      'text': '°',      'logprob': -0.6830151677131653    }  ],  'details': {    'generated_tokens': 4  }}{  'generated_text': ' ',  'tokens': [    {      'text': '▁',      'logprob': -0.8991543650627136    }  ],  'details': {    'generated_tokens': 5  }}{  'generated_text': 'f',  'tokens': [    {      'text': 'f',      'logprob': -0.44153422117233276    }  ],  'details': {    'generated_tokens': 6  }}{  'tokens': [    {      'text': '\u003c/s\u003e',      'logprob': -0.015602776780724525    }  ],  'details': {    'finish_reason': 'EOS_TOKEN',    'generated_tokens': 7  }}"
                 }
             }
 
+        },
+        {
+            "query_text": "This are an very wrong phraxe",
+            "models": {
+                "flan-t5-large-grammar-synthesis-caikit": {
+                    "generatedTokenCount":  9,
+                    "response_text": "This is a very wrong phrase.",
+                    "streamed_response_text":   "{'details':{'input_token_count':'10'}}{'generated_text':'This','tokens':[{'text':'▁This','logprob':<logprob_removed>}],'details':{'generated_tokens':1}}{'generated_text':'is','tokens':[{'text':'▁is','logprob':<logprob_removed>}],'details':{'generated_tokens':2}}{'generated_text':'','tokens':[{'text':'▁','logprob':<logprob_removed>}],'details':{'generated_tokens':3}}{'generated_text':'a','tokens':[{'text':'a','logprob':<logprob_removed>}],'details':{'generated_tokens':4}}{'generated_text':'very','tokens':[{'text':'▁very','logprob':<logprob_removed>}],'details':{'generated_tokens':5}}{'generated_text':'wrong','tokens':[{'text':'▁wrong','logprob':<logprob_removed>}],'details':{'generated_tokens':6}}{'generated_text':'phrase','tokens':[{'text':'▁phrase','logprob':<logprob_removed>}],'details':{'generated_tokens':7}}{'generated_text':'.','tokens':[{'text':'.','logprob':<logprob_removed>}],'details':{'generated_tokens':8}}{'tokens':[{'text':'</s>','logprob':<logprob_removed>}],'details':{'finish_reason':'EOS_TOKEN','generated_tokens':9}}"
+                }
+            }
         }
     ]
 }
diff --git a/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/422__model_serving_llm.robot b/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/422__model_serving_llm.robot
@@ -42,8 +42,12 @@ ${SKIP_PREREQS_INSTALL}=    ${FALSE}
 ${SCRIPT_BASED_INSTALL}=    ${FALSE}
 ${MODELS_BUCKET}=    ${S3.BUCKET_3}
 ${FLAN_MODEL_S3_DIR}=    flan-t5-small
+${FLAN_GRAMMAR_MODEL_S3_DIR}=    flan-t5-large-grammar-synthesis-caikit
+${FLAN_LARGE_MODEL_S3_DIR}=    flan-t5-large
 ${BLOOM_MODEL_S3_DIR}=    bloom-560m
 ${FLAN_STORAGE_URI}=    s3://${S3.BUCKET_3.NAME}/${FLAN_MODEL_S3_DIR}/
+${FLAN_GRAMMAR_STORAGE_URI}=    s3://${S3.BUCKET_3.NAME}/${FLAN_GRAMMAR_MODEL_S3_DIR}/
+${FLAN_LARGE_STORAGE_URI}=    s3://${S3.BUCKET_3.NAME}/${FLAN_LARGE_MODEL_S3_DIR}/
 ${BLOOM_STORAGE_URI}=    s3://${S3.BUCKET_3.NAME}/${BLOOM_MODEL_S3_DIR}/
 ${CAIKIT_ALLTOKENS_ENDPOINT}=    caikit.runtime.Nlp.NlpService/TextGenerationTaskPredict
 ${CAIKIT_STREAM_ENDPOINT}=    caikit.runtime.Nlp.NlpService/ServerStreamingTextGenerationTaskPredict
@@ -379,6 +383,53 @@ Verify Non Admin Can Serve And Query A Model
     [Teardown]  Run Keywords   Login To OCP Using API    ${OCP_ADMIN_USER.USERNAME}    ${OCP_ADMIN_USER.PASSWORD}   AND
     ...        Clean Up Test Project    test_ns=${test_namespace}   isvc_names=${models_names}
 
+Verify User Can Serve And Query Flan-t5 Grammar Syntax Corrector
+    [Documentation]    Deploys and queries flan-t5-large-grammar-synthesis model
+    [Tags]    ODS-2441    WatsonX
+    [Setup]    Set Project And Runtime    namespace=grammar-model
+    ${test_namespace}=    Set Variable     grammar-model
+    ${flan_model_name}=    Set Variable    flan-t5-large-grammar-synthesis-caikit
+    ${models_names}=    Create List    ${flan_model_name}
+    Compile Inference Service YAML    isvc_name=${flan_model_name}
+    ...    sa_name=${DEFAULT_BUCKET_SA_NAME}
+    ...    model_storage_uri=${FLAN_GRAMMAR_STORAGE_URI}
+    Deploy Model Via CLI    isvc_filepath=${LLM_RESOURCES_DIRPATH}/caikit_isvc_filled.yaml
+    ...    namespace=${test_namespace}
+    Wait For Pods To Be Ready    label_selector=serving.kserve.io/inferenceservice=${flan_model_name}
+    ...    namespace=${test_namespace}
+    Query Models And Check Responses Multiple Times    models_names=${models_names}
+    ...    endpoint=${CAIKIT_ALLTOKENS_ENDPOINT}    n_times=1
+    ...    namespace=${test_namespace}    query_idx=1
+    Query Models And Check Responses Multiple Times    models_names=${models_names}
+    ...    endpoint=${CAIKIT_STREAM_ENDPOINT}    n_times=1    streamed_response=${TRUE}
+    ...    namespace=${test_namespace}    query_idx=${1}
+    [Teardown]    Clean Up Test Project    test_ns=${test_namespace}
+    ...    isvc_names=${models_names}
+
+Verify User Can Serve And Query Flan-t5 Large
+    [Documentation]    Deploys and queries flan-t5-large model
+    [Tags]    ODS-2434    WatsonX
+    [Setup]    Set Project And Runtime    namespace=flan-t5-large3
+    ${test_namespace}=    Set Variable     flan-t5-large3
+    ${flan_model_name}=    Set Variable    flan-t5-large
+    ${models_names}=    Create List    ${flan_model_name}
+    Compile Inference Service YAML    isvc_name=${flan_model_name}
+    ...    sa_name=${DEFAULT_BUCKET_SA_NAME}
+    ...    model_storage_uri=${FLAN_LARGE_STORAGE_URI}
+    Deploy Model Via CLI    isvc_filepath=${LLM_RESOURCES_DIRPATH}/caikit_isvc_filled.yaml
+    ...    namespace=${test_namespace}
+    Wait For Pods To Be Ready    label_selector=serving.kserve.io/inferenceservice=${flan_model_name}
+    ...    namespace=${test_namespace}
+    Query Models And Check Responses Multiple Times    models_names=${models_names}
+    ...    endpoint=${CAIKIT_ALLTOKENS_ENDPOINT}    n_times=1
+    ...    namespace=${test_namespace}    query_idx=${0}
+    Query Models And Check Responses Multiple Times    models_names=${models_names}
+    ...    endpoint=${CAIKIT_STREAM_ENDPOINT}    n_times=1    streamed_response=${TRUE}
+    ...    namespace=${test_namespace}    query_idx=${0}
+    [Teardown]    Clean Up Test Project    test_ns=${test_namespace}
+    ...    isvc_names=${models_names}
+
+
 *** Keywords ***
 Install Model Serving Stack Dependencies
     [Documentation]    Instaling And Configuring dependency operators: Service Mesh and Serverless.
@@ -718,33 +769,35 @@ Model Response Should Match The Expectation
         ${cleaned_response_text}=    Replace String Using Regexp    ${model_response}    \\s+    ${EMPTY}
         ${rc}    ${cleaned_response_text}=    Run And Return Rc And Output    echo -e '${cleaned_response_text}'
         ${cleaned_response_text}=    Replace String Using Regexp    ${cleaned_response_text}    "    '
-        ${cleaned_response_text}=    Replace String Using Regexp    ${cleaned_response_text}    [-]?\\d.\\d+    <logprob_removed>
+        ${cleaned_response_text}=    Replace String Using Regexp    ${cleaned_response_text}
+        ...    [-]?\\d.\\d+[e]?[-]?\\d+    <logprob_removed>
         Log    ${cleaned_response_text}
         ${cleaned_exp_response_text}=    Replace String Using Regexp
         ...    ${EXP_RESPONSES}[queries][${query_idx}][models][${model_name}][streamed_response_text]
-        ...    [-]?\\d.\\d+    <logprob_removed>
+        ...    [-]?\\d.\\d+[e]?[-]?\\d+    <logprob_removed>
+        ${cleaned_exp_response_text}=    Replace String Using Regexp    ${cleaned_exp_response_text}    \\s+    ${EMPTY}
         Should Be Equal    ${cleaned_response_text}    ${cleaned_exp_response_text}
     END
 
 Query Models And Check Responses Multiple Times
     [Documentation]    Queries and checks the responses of the given models in a loop
     ...                running ${n_times}. For each loop run it queries all the model in sequence
     [Arguments]    ${models_names}    ${namespace}    ${endpoint}=${CAIKIT_ALLTOKENS_ENDPOINT}    ${n_times}=10
-    ...            ${streamed_response}=${FALSE}
+    ...            ${streamed_response}=${FALSE}    ${query_idx}=0
     FOR    ${counter}    IN RANGE    0    ${n_times}    1
         Log    ${counter}
         FOR    ${index}    ${model_name}    IN ENUMERATE    @{models_names}
             Log    ${index}: ${model_name}
             ${host}=    Get KServe Inference Host Via CLI    isvc_name=${model_name}   namespace=${namespace}
-            ${body}=    Set Variable    '{"text": "${EXP_RESPONSES}[queries][0][query_text]"}'
+            ${body}=    Set Variable    '{"text": "${EXP_RESPONSES}[queries][${query_idx}][query_text]"}'
             ${header}=    Set Variable    'mm-model-id: ${model_name}'
             ${res}=    Query Model With GRPCURL   host=${host}    port=443
             ...    endpoint=${endpoint}
             ...    json_body=${body}    json_header=${header}
             ...    insecure=${TRUE}    skip_res_json=${streamed_response}
             Run Keyword And Continue On Failure
             ...    Model Response Should Match The Expectation    model_response=${res}    model_name=${model_name}
-            ...    query_idx=0    streamed_response=${streamed_response}
+            ...    streamed_response=${streamed_response}    query_idx=${query_idx}
         END
     END