diff --git a/package.json b/package.json index 88fef14..7ce5006 100644 --- a/package.json +++ b/package.json @@ -11,7 +11,7 @@ "@inquirer/select": "^4.0.1", "@oclif/core": "^4", "@oclif/multi-stage-output": "^0.7.12", - "@salesforce/agents": "^0.5.1", + "@salesforce/agents": "^0.5.2", "@salesforce/core": "^8.8.0", "@salesforce/kit": "^3.2.1", "@salesforce/sf-plugins-core": "^12.1.0", diff --git a/schemas/agent-test-results.json b/schemas/agent-test-results.json index ae4e91f..4b0d329 100644 --- a/schemas/agent-test-results.json +++ b/schemas/agent-test-results.json @@ -56,6 +56,9 @@ "number": { "type": "string" }, + "utterance": { + "type": "string" + }, "startTime": { "type": "string" }, @@ -151,7 +154,7 @@ } } }, - "required": ["status", "number", "startTime", "generatedData", "expectationResults"], + "required": ["status", "number", "utterance", "startTime", "generatedData", "expectationResults"], "additionalProperties": false } } diff --git a/test/mocks/einstein_ai-evaluations_runs_4KBSM000000003F4AQ_details.json b/test/mocks/einstein_ai-evaluations_runs_4KBSM000000003F4AQ_details.json index c154dbd..b84e3ed 100644 --- a/test/mocks/einstein_ai-evaluations_runs_4KBSM000000003F4AQ_details.json +++ b/test/mocks/einstein_ai-evaluations_runs_4KBSM000000003F4AQ_details.json @@ -1,7 +1,7 @@ { "status": "COMPLETED", "startTime": "2024-11-28T12:00:00Z", - "endTime": "2024-11-28T12:05:00Z", + "endTime": "2024-11-28T12:00:48.56Z", "errorMessage": null, "subjectName": "Copilot_for_Salesforce", "testSet": { @@ -10,6 +10,7 @@ { "status": "COMPLETED", "number": 1, + "utterance": "Summarize account Acme", "startTime": "2024-11-28T12:00:10Z", "endTime": "2024-11-28T12:00:20Z", "generatedData": { @@ -23,8 +24,8 @@ "expectationResults": [ { "name": "topic_sequence_match", - "actualValue": "Result A", - "expectedValue": "Result A", + "actualValue": "GeneralCRM", + "expectedValue": "GeneralCRM", "score": 1.0, "result": "Passed", "metricLabel": "Accuracy", @@ -37,8 +38,22 @@ }, { "name": "action_sequence_match", - "actualValue": "Result B", - "expectedValue": "Result B", + "actualValue": "[\"IdentifyRecordByName\",\"SummarizeRecord\"]", + "expectedValue": "[\"IdentifyRecordByName\",\"SummarizeRecord\"]", + "score": 1.0, + "result": "Passed", + "metricLabel": "Precision", + "metricExplainability": "Measures the precision of the result.", + "status": "Completed", + "startTime": "2024-11-28T12:00:14Z", + "endTime": "2024-11-28T12:00:15Z", + "errorCode": null, + "errorMessage": null + }, + { + "name": "bot_response_rating", + "actualValue": "Here is the summary of the account Acme. How else can I assist you? Acme is a customer since 2019. They have 3 open opportunities and 2 open cases.", + "expectedValue": "Summary of account details are shown", "score": 0.9, "result": "Passed", "metricLabel": "Precision", @@ -55,6 +70,7 @@ "status": "ERROR", "number": 2, "startTime": "2024-11-28T12:00:30Z", + "utterance": "Summarize the open cases and Activities of acme from sep to nov 2024", "endTime": "2024-11-28T12:00:40Z", "generatedData": { "type": "AGENT", @@ -67,31 +83,45 @@ "expectationResults": [ { "name": "topic_sequence_match", - "actualValue": "Result C", - "expectedValue": "Result D", - "score": 0.5, - "result": "Failed", + "actualValue": "GeneralCRM", + "expectedValue": "GeneralCRM", + "score": 1, + "result": "Passed", "metricLabel": "Accuracy", "metricExplainability": "Measures the correctness of the result.", "status": "Completed", "startTime": "2024-11-28T12:00:32Z", "endTime": "2024-11-28T12:00:33Z", "errorCode": null, - "errorMessage": "Expected \"Result D\" but got \"Result C\"." + "errorMessage": null }, { - "name": "topic_sequence_match", - "actualValue": "Result C", - "expectedValue": "Result D", + "name": "action_sequence_match", + "actualValue": "[\"IdentifyRecordByName\",\"QueryRecords\"]", + "expectedValue": "[\"IdentifyRecordByName\",\"QueryRecords\",\"GetActivitiesTimeline\"]", "score": 0.5, "result": "Failed", - "metricLabel": "Accuracy", - "metricExplainability": "Measures the correctness of the result.", + "metricLabel": "Precision", + "metricExplainability": "Measures the precision of the result.", "status": "Completed", - "startTime": "2024-11-28T12:00:32Z", - "endTime": "2024-11-28T12:00:33Z", - "errorCode": null, - "errorMessage": "Expected \"Result D\" but got \"Result C\"." + "startTime": "2024-11-28T12:00:14Z", + "endTime": "2024-11-28T12:00:15Z", + "errorCode": 1, + "errorMessage": "Actual response does not match the expected response" + }, + { + "name": "bot_response_rating", + "actualValue": "It looks like I am unable to find the information you are looking for due to access restrictions. How else can I assist you?", + "expectedValue": "Summary of open cases and activities associated with timeline", + "score": 0.1, + "result": "Failed", + "metricLabel": "Precision", + "metricExplainability": "Measures the precision of the result.", + "status": "Completed", + "startTime": "2024-11-28T12:00:14Z", + "endTime": "2024-11-28T12:00:15Z", + "errorCode": 1, + "errorMessage": "Actual response does not match the expected response" } ] } diff --git a/yarn.lock b/yarn.lock index 765b31e..6e11caa 100644 --- a/yarn.lock +++ b/yarn.lock @@ -1422,16 +1422,17 @@ resolved "https://registry.yarnpkg.com/@pkgjs/parseargs/-/parseargs-0.11.0.tgz#a77ea742fab25775145434eb1d2328cf5013ac33" integrity sha512-+1VkjdD0QBLPodGrJUeqarH8VAIvQODIbwh9XpP5Syisf7YoQgsJKPNFoqqLQlu+VQ/tVSshMR6loPMn8U+dPg== -"@salesforce/agents@^0.5.1": - version "0.5.1" - resolved "https://registry.yarnpkg.com/@salesforce/agents/-/agents-0.5.1.tgz#b6de16004505432c226c02f612c6b0b7b6227f6f" - integrity sha512-FGpCQ3PVzZunoaQVPAJG05eqafOvf2P7fx2w5aZYVg9yqwM/UnBpTBKVvkmdZDsBRTUYaExr6tvboaMc5Hsfzw== +"@salesforce/agents@^0.5.2": + version "0.5.2" + resolved "https://registry.yarnpkg.com/@salesforce/agents/-/agents-0.5.2.tgz#b60e7227a78e3c4431565ba6e3862aa97c65306f" + integrity sha512-Khr24nZlV875PPwrmFjfDBSMi7Hqfrb6+Y+d8I4OHMU7iP+U9lnvRAwczD9o+WB5gq6WSCkhgQY89QmAA55dKw== dependencies: "@oclif/table" "^0.3.5" "@salesforce/core" "^8.8.0" "@salesforce/kit" "^3.2.3" "@salesforce/sf-plugins-core" "^12.1.0" "@salesforce/source-deploy-retrieve" "^12.10.3" + ansis "^3.4.0" fast-xml-parser "^4" nock "^13.5.6" @@ -2631,6 +2632,11 @@ ansis@^3.3.1, ansis@^3.3.2: resolved "https://registry.yarnpkg.com/ansis/-/ansis-3.3.2.tgz#15adc36fea112da95c74d309706e593618accac3" integrity sha512-cFthbBlt+Oi0i9Pv/j6YdVWJh54CtjGACaMPCIrEV4Ha7HWsIjXDwseYV79TIL0B4+KfSwD5S70PeQDkPUd1rA== +ansis@^3.4.0: + version "3.4.0" + resolved "https://registry.yarnpkg.com/ansis/-/ansis-3.4.0.tgz#d49ebb72f228aa122733a585c600dcba4f5f0838" + integrity sha512-zVESKSQhWaPhGaWiKj1k+UqvpC7vPBBgG3hjQEeIx2YGzylWt8qA3ziAzRuUtm0OnaGsZKjIvfl8D/sJTt/I0w== + anymatch@~3.1.2: version "3.1.2" resolved "https://registry.yarnpkg.com/anymatch/-/anymatch-3.1.2.tgz#c0557c096af32f106198f4f4e2a383537e378716"