diff --git a/README.md b/README.md index 17dc454..8f7cebb 100644 --- a/README.md +++ b/README.md @@ -111,7 +111,7 @@ EXAMPLES $ sf agent create --name CustomerSupportAgent --spec ./config/agentSpec.json --target-org my-org ``` -_See code: [src/commands/agent/create.ts](https://github.com/salesforcecli/plugin-agent/blob/1.7.1/src/commands/agent/create.ts)_ +_See code: [src/commands/agent/create.ts](https://github.com/salesforcecli/plugin-agent/blob/1.7.2-dev.1/src/commands/agent/create.ts)_ ## `sf agent generate definition` @@ -136,7 +136,7 @@ EXAMPLES $ sf agent generate definition ``` -_See code: [src/commands/agent/generate/definition.ts](https://github.com/salesforcecli/plugin-agent/blob/1.7.1/src/commands/agent/generate/definition.ts)_ +_See code: [src/commands/agent/generate/definition.ts](https://github.com/salesforcecli/plugin-agent/blob/1.7.2-dev.1/src/commands/agent/generate/definition.ts)_ ## `sf agent generate spec` @@ -197,7 +197,7 @@ EXAMPLES $ sf agent generate spec --output-dir specs --target-org my-org ``` -_See code: [src/commands/agent/generate/spec.ts](https://github.com/salesforcecli/plugin-agent/blob/1.7.1/src/commands/agent/generate/spec.ts)_ +_See code: [src/commands/agent/generate/spec.ts](https://github.com/salesforcecli/plugin-agent/blob/1.7.2-dev.1/src/commands/agent/generate/spec.ts)_ ## `sf agent generate testset` @@ -220,7 +220,7 @@ EXAMPLES $ sf agent generate testset ``` -_See code: [src/commands/agent/generate/testset.ts](https://github.com/salesforcecli/plugin-agent/blob/1.7.1/src/commands/agent/generate/testset.ts)_ +_See code: [src/commands/agent/generate/testset.ts](https://github.com/salesforcecli/plugin-agent/blob/1.7.2-dev.1/src/commands/agent/generate/testset.ts)_ ## `sf agent preview` @@ -255,7 +255,7 @@ FLAG DESCRIPTIONS the API name of the agent? (TBD based on agents library) ``` -_See code: [src/commands/agent/preview.ts](https://github.com/salesforcecli/plugin-agent/blob/1.7.1/src/commands/agent/preview.ts)_ +_See code: [src/commands/agent/preview.ts](https://github.com/salesforcecli/plugin-agent/blob/1.7.2-dev.1/src/commands/agent/preview.ts)_ ## `sf agent test cancel` @@ -292,7 +292,7 @@ EXAMPLES $ sf agent test cancel --job-id 4KBfake0000003F4AQ --target-org my-org ``` -_See code: [src/commands/agent/test/cancel.ts](https://github.com/salesforcecli/plugin-agent/blob/1.7.1/src/commands/agent/test/cancel.ts)_ +_See code: [src/commands/agent/test/cancel.ts](https://github.com/salesforcecli/plugin-agent/blob/1.7.2-dev.1/src/commands/agent/test/cancel.ts)_ ## `sf agent test results` @@ -348,7 +348,7 @@ FLAG DESCRIPTIONS test results aren't written. ``` -_See code: [src/commands/agent/test/results.ts](https://github.com/salesforcecli/plugin-agent/blob/1.7.1/src/commands/agent/test/results.ts)_ +_See code: [src/commands/agent/test/results.ts](https://github.com/salesforcecli/plugin-agent/blob/1.7.2-dev.1/src/commands/agent/test/results.ts)_ ## `sf agent test resume` @@ -411,7 +411,7 @@ FLAG DESCRIPTIONS test results aren't written. ``` -_See code: [src/commands/agent/test/resume.ts](https://github.com/salesforcecli/plugin-agent/blob/1.7.1/src/commands/agent/test/resume.ts)_ +_See code: [src/commands/agent/test/resume.ts](https://github.com/salesforcecli/plugin-agent/blob/1.7.2-dev.1/src/commands/agent/test/resume.ts)_ ## `sf agent test run` @@ -474,6 +474,6 @@ FLAG DESCRIPTIONS test results aren't written. ``` -_See code: [src/commands/agent/test/run.ts](https://github.com/salesforcecli/plugin-agent/blob/1.7.1/src/commands/agent/test/run.ts)_ +_See code: [src/commands/agent/test/run.ts](https://github.com/salesforcecli/plugin-agent/blob/1.7.2-dev.1/src/commands/agent/test/run.ts)_ diff --git a/package.json b/package.json index bfe9dff..7ce5006 100644 --- a/package.json +++ b/package.json @@ -1,7 +1,7 @@ { "name": "@salesforce/plugin-agent", "description": "Commands to interact with Salesforce agents", - "version": "1.7.1", + "version": "1.7.2-dev.1", "author": "Salesforce", "bugs": "https://github.com/forcedotcom/cli/issues", "dependencies": { @@ -11,7 +11,7 @@ "@inquirer/select": "^4.0.1", "@oclif/core": "^4", "@oclif/multi-stage-output": "^0.7.12", - "@salesforce/agents": "^0.5.1", + "@salesforce/agents": "^0.5.2", "@salesforce/core": "^8.8.0", "@salesforce/kit": "^3.2.1", "@salesforce/sf-plugins-core": "^12.1.0", diff --git a/schemas/agent-test-results.json b/schemas/agent-test-results.json index ae4e91f..4b0d329 100644 --- a/schemas/agent-test-results.json +++ b/schemas/agent-test-results.json @@ -56,6 +56,9 @@ "number": { "type": "string" }, + "utterance": { + "type": "string" + }, "startTime": { "type": "string" }, @@ -151,7 +154,7 @@ } } }, - "required": ["status", "number", "startTime", "generatedData", "expectationResults"], + "required": ["status", "number", "utterance", "startTime", "generatedData", "expectationResults"], "additionalProperties": false } } diff --git a/src/commands/agent/generate/testset.ts b/src/commands/agent/generate/testset.ts index 905f6dc..7a80a21 100644 --- a/src/commands/agent/generate/testset.ts +++ b/src/commands/agent/generate/testset.ts @@ -9,19 +9,17 @@ import { mkdir, writeFile } from 'node:fs/promises'; import { SfCommand } from '@salesforce/sf-plugins-core'; import { Messages } from '@salesforce/core'; import input from '@inquirer/input'; -import select from '@inquirer/select'; import confirm from '@inquirer/confirm'; import { theme } from '../../../inquirer-theme.js'; Messages.importMessagesDirectoryFromMetaUrl(import.meta.url); const messages = Messages.loadMessages('@salesforce/plugin-agent', 'agent.generate.testset'); -type ExpectationType = 'topic_sequence_match' | 'action_sequence_match' | 'bot_response_rating'; - export type TestSetInputs = { utterance: string; - expectationType: ExpectationType; - expectedValue: string; + actionSequenceExpectedValue: string; + botRatingExpectedValue: string; + topicSequenceExpectedValue: string; }; async function promptForTestCase(): Promise { @@ -31,21 +29,33 @@ async function promptForTestCase(): Promise { theme, }); - const expectationType = await select({ - message: 'What type of expectation would you like to test for the utterance?', - choices: ['topic_sequence_match', 'action_sequence_match', 'bot_response_rating'], + const topicSequenceExpectedValue = await input({ + message: 'What is the expected value for the topic expectation?', + validate: (d: string): boolean | string => { + if (!d.length) { + return 'expected value cannot be empty'; + } + return true; + }, theme, }); - const expectedValue = await input({ - message: 'What is the expected value for the expectation?', + const actionSequenceExpectedValue = await input({ + message: 'What is the expected value for the action expectation?', validate: (d: string): boolean | string => { if (!d.length) { return 'expected value cannot be empty'; } + return true; + }, + theme, + }); - if (expectationType === 'action_sequence_match') { - return d.split(',').length > 1 || 'expected value must be a comma-separated list of actions'; + const botRatingExpectedValue = await input({ + message: 'What is the expected value for the bot rating expectation?', + validate: (d: string): boolean | string => { + if (!d.length) { + return 'expected value cannot be empty'; } return true; @@ -55,8 +65,9 @@ async function promptForTestCase(): Promise { return { utterance, - expectationType, - expectedValue, + actionSequenceExpectedValue, + botRatingExpectedValue, + topicSequenceExpectedValue, }; } @@ -64,13 +75,6 @@ export function constructTestSetXML(testCases: TestSetInputs[]): string { const tab = ' '; let xml = `\n\n${tab}AGENT\n`; testCases.forEach((testCase, i) => { - const expectedValue = - testCase.expectationType === 'action_sequence_match' - ? `[${testCase.expectedValue - .split(',') - .map((v) => `"${v}"`) - .join(',')}]` - : testCase.expectedValue; xml += ` ${i + 1} @@ -78,8 +82,19 @@ export function constructTestSetXML(testCases: TestSetInputs[]): string { - ${testCase.expectationType} - ${expectedValue} + topic_sequence_match + ${testCase.topicSequenceExpectedValue} + + + action_sequence_match + ${`[${testCase.actionSequenceExpectedValue + .split(',') + .map((v) => `"${v}"`) + .join(',')}]`} + + + bot_response_rating + ${testCase.botRatingExpectedValue} \n`; diff --git a/src/testStages.ts b/src/testStages.ts index 8c0f9ec..98e7d87 100644 --- a/src/testStages.ts +++ b/src/testStages.ts @@ -43,7 +43,7 @@ export class TestStages { { stage: 'Polling for Test Results', type: 'dynamic-key-value', - label: 'Completed Tests', + label: 'Completed Test Cases', get: (data): string | undefined => data?.totalTestCases && data?.passingTestCases && data?.failingTestCases ? `${data?.passingTestCases + data?.failingTestCases}/${data?.totalTestCases}` @@ -52,13 +52,13 @@ export class TestStages { { stage: 'Polling for Test Results', type: 'dynamic-key-value', - label: 'Passing Tests', + label: 'Passing Test Cases', get: (data): string | undefined => data?.passingTestCases?.toString(), }, { stage: 'Polling for Test Results', type: 'dynamic-key-value', - label: 'Failing Tests', + label: 'Failing Test Cases', get: (data): string | undefined => data?.failingTestCases?.toString(), }, ], diff --git a/test/commands/agent/generate/testset.test.ts b/test/commands/agent/generate/testset.test.ts index 7296a91..10d8eb4 100644 --- a/test/commands/agent/generate/testset.test.ts +++ b/test/commands/agent/generate/testset.test.ts @@ -12,18 +12,21 @@ describe('constructTestSetXML', () => { const testCases = [ { utterance: 'hello', - expectationType: 'topic_sequence_match', - expectedValue: 'greeting', + actionSequenceExpectedValue: 'foo,bar', + botRatingExpectedValue: 'baz', + topicSequenceExpectedValue: 'qux', }, { utterance: 'goodbye', - expectationType: 'action_sequence_match', - expectedValue: 'farewell,seeya', + actionSequenceExpectedValue: 'foo,bar', + botRatingExpectedValue: 'baz', + topicSequenceExpectedValue: 'qux', }, { utterance: 'how are you', - expectationType: 'bot_response_rating', - expectedValue: '.5', + actionSequenceExpectedValue: 'foo,bar', + botRatingExpectedValue: 'baz', + topicSequenceExpectedValue: 'qux', }, ] satisfies TestSetInputs[]; @@ -40,7 +43,15 @@ describe('constructTestSetXML', () => { topic_sequence_match - greeting + qux + + + action_sequence_match + ["foo","bar"] + + + bot_response_rating + baz @@ -50,9 +61,17 @@ describe('constructTestSetXML', () => { goodbye + + topic_sequence_match + qux + action_sequence_match - ["farewell","seeya"] + ["foo","bar"] + + + bot_response_rating + baz @@ -62,9 +81,17 @@ describe('constructTestSetXML', () => { how are you + + topic_sequence_match + qux + + + action_sequence_match + ["foo","bar"] + bot_response_rating - .5 + baz diff --git a/test/mocks/einstein_ai-evaluations_runs_4KBSM000000003F4AQ_details.json b/test/mocks/einstein_ai-evaluations_runs_4KBSM000000003F4AQ_details.json index c154dbd..b84e3ed 100644 --- a/test/mocks/einstein_ai-evaluations_runs_4KBSM000000003F4AQ_details.json +++ b/test/mocks/einstein_ai-evaluations_runs_4KBSM000000003F4AQ_details.json @@ -1,7 +1,7 @@ { "status": "COMPLETED", "startTime": "2024-11-28T12:00:00Z", - "endTime": "2024-11-28T12:05:00Z", + "endTime": "2024-11-28T12:00:48.56Z", "errorMessage": null, "subjectName": "Copilot_for_Salesforce", "testSet": { @@ -10,6 +10,7 @@ { "status": "COMPLETED", "number": 1, + "utterance": "Summarize account Acme", "startTime": "2024-11-28T12:00:10Z", "endTime": "2024-11-28T12:00:20Z", "generatedData": { @@ -23,8 +24,8 @@ "expectationResults": [ { "name": "topic_sequence_match", - "actualValue": "Result A", - "expectedValue": "Result A", + "actualValue": "GeneralCRM", + "expectedValue": "GeneralCRM", "score": 1.0, "result": "Passed", "metricLabel": "Accuracy", @@ -37,8 +38,22 @@ }, { "name": "action_sequence_match", - "actualValue": "Result B", - "expectedValue": "Result B", + "actualValue": "[\"IdentifyRecordByName\",\"SummarizeRecord\"]", + "expectedValue": "[\"IdentifyRecordByName\",\"SummarizeRecord\"]", + "score": 1.0, + "result": "Passed", + "metricLabel": "Precision", + "metricExplainability": "Measures the precision of the result.", + "status": "Completed", + "startTime": "2024-11-28T12:00:14Z", + "endTime": "2024-11-28T12:00:15Z", + "errorCode": null, + "errorMessage": null + }, + { + "name": "bot_response_rating", + "actualValue": "Here is the summary of the account Acme. How else can I assist you? Acme is a customer since 2019. They have 3 open opportunities and 2 open cases.", + "expectedValue": "Summary of account details are shown", "score": 0.9, "result": "Passed", "metricLabel": "Precision", @@ -55,6 +70,7 @@ "status": "ERROR", "number": 2, "startTime": "2024-11-28T12:00:30Z", + "utterance": "Summarize the open cases and Activities of acme from sep to nov 2024", "endTime": "2024-11-28T12:00:40Z", "generatedData": { "type": "AGENT", @@ -67,31 +83,45 @@ "expectationResults": [ { "name": "topic_sequence_match", - "actualValue": "Result C", - "expectedValue": "Result D", - "score": 0.5, - "result": "Failed", + "actualValue": "GeneralCRM", + "expectedValue": "GeneralCRM", + "score": 1, + "result": "Passed", "metricLabel": "Accuracy", "metricExplainability": "Measures the correctness of the result.", "status": "Completed", "startTime": "2024-11-28T12:00:32Z", "endTime": "2024-11-28T12:00:33Z", "errorCode": null, - "errorMessage": "Expected \"Result D\" but got \"Result C\"." + "errorMessage": null }, { - "name": "topic_sequence_match", - "actualValue": "Result C", - "expectedValue": "Result D", + "name": "action_sequence_match", + "actualValue": "[\"IdentifyRecordByName\",\"QueryRecords\"]", + "expectedValue": "[\"IdentifyRecordByName\",\"QueryRecords\",\"GetActivitiesTimeline\"]", "score": 0.5, "result": "Failed", - "metricLabel": "Accuracy", - "metricExplainability": "Measures the correctness of the result.", + "metricLabel": "Precision", + "metricExplainability": "Measures the precision of the result.", "status": "Completed", - "startTime": "2024-11-28T12:00:32Z", - "endTime": "2024-11-28T12:00:33Z", - "errorCode": null, - "errorMessage": "Expected \"Result D\" but got \"Result C\"." + "startTime": "2024-11-28T12:00:14Z", + "endTime": "2024-11-28T12:00:15Z", + "errorCode": 1, + "errorMessage": "Actual response does not match the expected response" + }, + { + "name": "bot_response_rating", + "actualValue": "It looks like I am unable to find the information you are looking for due to access restrictions. How else can I assist you?", + "expectedValue": "Summary of open cases and activities associated with timeline", + "score": 0.1, + "result": "Failed", + "metricLabel": "Precision", + "metricExplainability": "Measures the precision of the result.", + "status": "Completed", + "startTime": "2024-11-28T12:00:14Z", + "endTime": "2024-11-28T12:00:15Z", + "errorCode": 1, + "errorMessage": "Actual response does not match the expected response" } ] } diff --git a/yarn.lock b/yarn.lock index 765b31e..6e11caa 100644 --- a/yarn.lock +++ b/yarn.lock @@ -1422,16 +1422,17 @@ resolved "https://registry.yarnpkg.com/@pkgjs/parseargs/-/parseargs-0.11.0.tgz#a77ea742fab25775145434eb1d2328cf5013ac33" integrity sha512-+1VkjdD0QBLPodGrJUeqarH8VAIvQODIbwh9XpP5Syisf7YoQgsJKPNFoqqLQlu+VQ/tVSshMR6loPMn8U+dPg== -"@salesforce/agents@^0.5.1": - version "0.5.1" - resolved "https://registry.yarnpkg.com/@salesforce/agents/-/agents-0.5.1.tgz#b6de16004505432c226c02f612c6b0b7b6227f6f" - integrity sha512-FGpCQ3PVzZunoaQVPAJG05eqafOvf2P7fx2w5aZYVg9yqwM/UnBpTBKVvkmdZDsBRTUYaExr6tvboaMc5Hsfzw== +"@salesforce/agents@^0.5.2": + version "0.5.2" + resolved "https://registry.yarnpkg.com/@salesforce/agents/-/agents-0.5.2.tgz#b60e7227a78e3c4431565ba6e3862aa97c65306f" + integrity sha512-Khr24nZlV875PPwrmFjfDBSMi7Hqfrb6+Y+d8I4OHMU7iP+U9lnvRAwczD9o+WB5gq6WSCkhgQY89QmAA55dKw== dependencies: "@oclif/table" "^0.3.5" "@salesforce/core" "^8.8.0" "@salesforce/kit" "^3.2.3" "@salesforce/sf-plugins-core" "^12.1.0" "@salesforce/source-deploy-retrieve" "^12.10.3" + ansis "^3.4.0" fast-xml-parser "^4" nock "^13.5.6" @@ -2631,6 +2632,11 @@ ansis@^3.3.1, ansis@^3.3.2: resolved "https://registry.yarnpkg.com/ansis/-/ansis-3.3.2.tgz#15adc36fea112da95c74d309706e593618accac3" integrity sha512-cFthbBlt+Oi0i9Pv/j6YdVWJh54CtjGACaMPCIrEV4Ha7HWsIjXDwseYV79TIL0B4+KfSwD5S70PeQDkPUd1rA== +ansis@^3.4.0: + version "3.4.0" + resolved "https://registry.yarnpkg.com/ansis/-/ansis-3.4.0.tgz#d49ebb72f228aa122733a585c600dcba4f5f0838" + integrity sha512-zVESKSQhWaPhGaWiKj1k+UqvpC7vPBBgG3hjQEeIx2YGzylWt8qA3ziAzRuUtm0OnaGsZKjIvfl8D/sJTt/I0w== + anymatch@~3.1.2: version "3.1.2" resolved "https://registry.yarnpkg.com/anymatch/-/anymatch-3.1.2.tgz#c0557c096af32f106198f4f4e2a383537e378716"