diff --git a/package.json b/package.json index 68aff73..a63441a 100644 --- a/package.json +++ b/package.json @@ -11,7 +11,7 @@ "@inquirer/select": "^4.0.1", "@oclif/core": "^4", "@oclif/multi-stage-output": "^0.7.12", - "@salesforce/agents": "^0.4.2", + "@salesforce/agents": "^0.5.1", "@salesforce/core": "^8.8.0", "@salesforce/kit": "^3.2.1", "@salesforce/sf-plugins-core": "^12.1.0", diff --git a/schemas/agent-test-results.json b/schemas/agent-test-results.json index 6914fb8..ae4e91f 100644 --- a/schemas/agent-test-results.json +++ b/schemas/agent-test-results.json @@ -20,14 +20,27 @@ "errorMessage": { "type": "string" }, - "testCases": { - "type": "array", - "items": { - "$ref": "#/definitions/TestCaseResult" - } + "subjectName": { + "type": "string" + }, + "testSet": { + "type": "object", + "properties": { + "name": { + "type": "string" + }, + "testCases": { + "type": "array", + "items": { + "$ref": "#/definitions/TestCaseResult" + } + } + }, + "required": ["name", "testCases"], + "additionalProperties": false } }, - "required": ["status", "startTime", "testCases"], + "required": ["status", "startTime", "subjectName", "testSet"], "additionalProperties": false }, "TestStatus": { diff --git a/src/flags.ts b/src/flags.ts index 0478cbf..1695172 100644 --- a/src/flags.ts +++ b/src/flags.ts @@ -11,12 +11,7 @@ Messages.importMessagesDirectoryFromMetaUrl(import.meta.url); const messages = Messages.loadMessages('@salesforce/plugin-agent', 'shared'); export const resultFormatFlag = Flags.option({ - options: [ - 'json', - 'human', - 'junit', - // 'tap', - ] as const, + options: ['json', 'human', 'junit', 'tap'] as const, default: 'human', summary: messages.getMessage('flags.result-format.summary'), }); diff --git a/src/handleTestResults.ts b/src/handleTestResults.ts index 0352916..1a6cceb 100644 --- a/src/handleTestResults.ts +++ b/src/handleTestResults.ts @@ -6,7 +6,7 @@ */ import { join } from 'node:path'; import { writeFile, mkdir } from 'node:fs/promises'; -import { AgentTestDetailsResponse, jsonFormat, humanFormat, junitFormat } from '@salesforce/agents'; +import { AgentTestDetailsResponse, jsonFormat, humanFormat, junitFormat, tapFormat } from '@salesforce/agents'; import { Ux } from '@salesforce/sf-plugins-core/Ux'; async function writeFileToDir(outputDir: string, fileName: string, content: string): Promise { @@ -24,7 +24,7 @@ export async function handleTestResults({ outputDir, }: { id: string; - format: 'human' | 'json' | 'junit'; + format: 'human' | 'json' | 'junit' | 'tap'; results: AgentTestDetailsResponse | undefined; jsonEnabled: boolean; outputDir?: string; @@ -59,4 +59,12 @@ export async function handleTestResults({ await writeFileToDir(outputDir, `test-result-${id}.xml`, formatted); } } + + if (format === 'tap') { + const formatted = await tapFormat(results); + ux.log(formatted); + if (outputDir) { + await writeFileToDir(outputDir, `test-result-${id}.txt`, formatted); + } + } } diff --git a/test/commands/agent/test/results.nut.ts b/test/commands/agent/test/results.nut.ts index 5004e91..e59eaf3 100644 --- a/test/commands/agent/test/results.nut.ts +++ b/test/commands/agent/test/results.nut.ts @@ -47,7 +47,7 @@ describe('agent test results NUTs', () => { ).jsonOutput; expect(output?.result.status).to.equal('COMPLETED'); - expect(output?.result.testCases.length).to.equal(2); + expect(output?.result.testSet.testCases.length).to.equal(2); // check that cache does not have an entry const cache = await AgentTestCache.create(); diff --git a/test/mocks/einstein_ai-evaluations_runs_4KBSM000000003F4AQ_details.json b/test/mocks/einstein_ai-evaluations_runs_4KBSM000000003F4AQ_details.json index b895af0..c154dbd 100644 --- a/test/mocks/einstein_ai-evaluations_runs_4KBSM000000003F4AQ_details.json +++ b/test/mocks/einstein_ai-evaluations_runs_4KBSM000000003F4AQ_details.json @@ -3,80 +3,98 @@ "startTime": "2024-11-28T12:00:00Z", "endTime": "2024-11-28T12:05:00Z", "errorMessage": null, - "testCases": [ - { - "status": "COMPLETED", - "number": 1, - "startTime": "2024-11-28T12:00:10Z", - "endTime": "2024-11-28T12:00:20Z", - "generatedData": { - "type": "AGENT", - "actionsSequence": ["Action1", "Action2"], - "outcome": "Success", - "topic": "Mathematics", - "inputTokensCount": 50, - "outputTokensCount": 55 - }, - "expectationResults": [ - { - "name": "topic_sequence_match", - "actualValue": "Result A", - "expectedValue": "Result A", - "score": 1.0, - "result": "Passed", - "metricLabel": "Accuracy", - "metricExplainability": "Measures the correctness of the result.", - "status": "Completed", - "startTime": "2024-11-28T12:00:12Z", - "endTime": "2024-11-28T12:00:13Z", - "errorCode": null, - "errorMessage": null + "subjectName": "Copilot_for_Salesforce", + "testSet": { + "name": "CRM_Sanity_v1", + "testCases": [ + { + "status": "COMPLETED", + "number": 1, + "startTime": "2024-11-28T12:00:10Z", + "endTime": "2024-11-28T12:00:20Z", + "generatedData": { + "type": "AGENT", + "actionsSequence": ["Action1", "Action2"], + "outcome": "Success", + "topic": "Mathematics", + "inputTokensCount": 50, + "outputTokensCount": 55 }, - { - "name": "action_sequence_match", - "actualValue": "Result B", - "expectedValue": "Result B", - "score": 0.9, - "result": "Passed", - "metricLabel": "Precision", - "metricExplainability": "Measures the precision of the result.", - "status": "Completed", - "startTime": "2024-11-28T12:00:14Z", - "endTime": "2024-11-28T12:00:15Z", - "errorCode": null, - "errorMessage": null - } - ] - }, - { - "status": "ERROR", - "number": 2, - "startTime": "2024-11-28T12:00:30Z", - "endTime": "2024-11-28T12:00:40Z", - "generatedData": { - "type": "AGENT", - "actionsSequence": ["Action3", "Action4"], - "outcome": "Failure", - "topic": "Physics", - "inputTokensCount": 60, - "outputTokensCount": 50 + "expectationResults": [ + { + "name": "topic_sequence_match", + "actualValue": "Result A", + "expectedValue": "Result A", + "score": 1.0, + "result": "Passed", + "metricLabel": "Accuracy", + "metricExplainability": "Measures the correctness of the result.", + "status": "Completed", + "startTime": "2024-11-28T12:00:12Z", + "endTime": "2024-11-28T12:00:13Z", + "errorCode": null, + "errorMessage": null + }, + { + "name": "action_sequence_match", + "actualValue": "Result B", + "expectedValue": "Result B", + "score": 0.9, + "result": "Passed", + "metricLabel": "Precision", + "metricExplainability": "Measures the precision of the result.", + "status": "Completed", + "startTime": "2024-11-28T12:00:14Z", + "endTime": "2024-11-28T12:00:15Z", + "errorCode": null, + "errorMessage": null + } + ] }, - "expectationResults": [ - { - "name": "topic_sequence_match", - "actualValue": "Result C", - "expectedValue": "Result D", - "score": 0.5, - "result": "Failed", - "metricLabel": "Accuracy", - "metricExplainability": "Measures the correctness of the result.", - "status": "Completed", - "startTime": "2024-11-28T12:00:32Z", - "endTime": "2024-11-28T12:00:33Z", - "errorCode": null, - "errorMessage": null - } - ] - } - ] + { + "status": "ERROR", + "number": 2, + "startTime": "2024-11-28T12:00:30Z", + "endTime": "2024-11-28T12:00:40Z", + "generatedData": { + "type": "AGENT", + "actionsSequence": ["Action3", "Action4"], + "outcome": "Failure", + "topic": "Physics", + "inputTokensCount": 60, + "outputTokensCount": 50 + }, + "expectationResults": [ + { + "name": "topic_sequence_match", + "actualValue": "Result C", + "expectedValue": "Result D", + "score": 0.5, + "result": "Failed", + "metricLabel": "Accuracy", + "metricExplainability": "Measures the correctness of the result.", + "status": "Completed", + "startTime": "2024-11-28T12:00:32Z", + "endTime": "2024-11-28T12:00:33Z", + "errorCode": null, + "errorMessage": "Expected \"Result D\" but got \"Result C\"." + }, + { + "name": "topic_sequence_match", + "actualValue": "Result C", + "expectedValue": "Result D", + "score": 0.5, + "result": "Failed", + "metricLabel": "Accuracy", + "metricExplainability": "Measures the correctness of the result.", + "status": "Completed", + "startTime": "2024-11-28T12:00:32Z", + "endTime": "2024-11-28T12:00:33Z", + "errorCode": null, + "errorMessage": "Expected \"Result D\" but got \"Result C\"." + } + ] + } + ] + } } diff --git a/yarn.lock b/yarn.lock index 167cb53..765b31e 100644 --- a/yarn.lock +++ b/yarn.lock @@ -1422,16 +1422,16 @@ resolved "https://registry.yarnpkg.com/@pkgjs/parseargs/-/parseargs-0.11.0.tgz#a77ea742fab25775145434eb1d2328cf5013ac33" integrity sha512-+1VkjdD0QBLPodGrJUeqarH8VAIvQODIbwh9XpP5Syisf7YoQgsJKPNFoqqLQlu+VQ/tVSshMR6loPMn8U+dPg== -"@salesforce/agents@^0.4.2": - version "0.4.2" - resolved "https://registry.yarnpkg.com/@salesforce/agents/-/agents-0.4.2.tgz#1caaf6a521cc3385a0411840876b77147aea880a" - integrity sha512-oTHEf3Dp6T8sYZzB/6V0HaZg7k/aZfTIFsoqYeewWpEa/KP0oopGHWTO9my15n1Guoq0qif6sWqX+RwWpqGgWw== +"@salesforce/agents@^0.5.1": + version "0.5.1" + resolved "https://registry.yarnpkg.com/@salesforce/agents/-/agents-0.5.1.tgz#b6de16004505432c226c02f612c6b0b7b6227f6f" + integrity sha512-FGpCQ3PVzZunoaQVPAJG05eqafOvf2P7fx2w5aZYVg9yqwM/UnBpTBKVvkmdZDsBRTUYaExr6tvboaMc5Hsfzw== dependencies: "@oclif/table" "^0.3.5" "@salesforce/core" "^8.8.0" "@salesforce/kit" "^3.2.3" "@salesforce/sf-plugins-core" "^12.1.0" - "@salesforce/source-deploy-retrieve" "^12.10.2" + "@salesforce/source-deploy-retrieve" "^12.10.3" fast-xml-parser "^4" nock "^13.5.6" @@ -1577,10 +1577,10 @@ cli-progress "^3.12.0" terminal-link "^3.0.0" -"@salesforce/source-deploy-retrieve@^12.10.2": - version "12.10.2" - resolved "https://registry.yarnpkg.com/@salesforce/source-deploy-retrieve/-/source-deploy-retrieve-12.10.2.tgz#c3737f3751f84cb4754b666edd83c014c91b87bb" - integrity sha512-V7V+nCxhJBcSEh5gJIZt44bsnYJ53HXt3Dec4gphu/Z8mu6Rr3b3yDC3Zw2EKyiDwYYxWvp0pjhqcO2xOTnNLw== +"@salesforce/source-deploy-retrieve@^12.10.3": + version "12.10.3" + resolved "https://registry.yarnpkg.com/@salesforce/source-deploy-retrieve/-/source-deploy-retrieve-12.10.3.tgz#fa16910781188877ffdfa5fde3a0318c0dfe3d07" + integrity sha512-bKIcN6VJajre2chF1xhPCjtR9gZpp8PrFFZ55UcWUMkoFAXscBPRJ7poAeorted3qMzS6wx+AuB27qYUCO+4iQ== dependencies: "@salesforce/core" "^8.8.0" "@salesforce/kit" "^3.2.2"