From aff7b5b77aa403a3f4d309304122ebae7f774408 Mon Sep 17 00:00:00 2001 From: Mike Donnalley Date: Fri, 13 Dec 2024 10:19:46 -0700 Subject: [PATCH 1/4] feat: add TAP format --- src/flags.ts | 7 +------ src/handleTestResults.ts | 12 ++++++++++-- ...-evaluations_runs_4KBSM000000003F4AQ_details.json | 2 ++ 3 files changed, 13 insertions(+), 8 deletions(-) diff --git a/src/flags.ts b/src/flags.ts index 0478cbf..1695172 100644 --- a/src/flags.ts +++ b/src/flags.ts @@ -11,12 +11,7 @@ Messages.importMessagesDirectoryFromMetaUrl(import.meta.url); const messages = Messages.loadMessages('@salesforce/plugin-agent', 'shared'); export const resultFormatFlag = Flags.option({ - options: [ - 'json', - 'human', - 'junit', - // 'tap', - ] as const, + options: ['json', 'human', 'junit', 'tap'] as const, default: 'human', summary: messages.getMessage('flags.result-format.summary'), }); diff --git a/src/handleTestResults.ts b/src/handleTestResults.ts index 0352916..1a6cceb 100644 --- a/src/handleTestResults.ts +++ b/src/handleTestResults.ts @@ -6,7 +6,7 @@ */ import { join } from 'node:path'; import { writeFile, mkdir } from 'node:fs/promises'; -import { AgentTestDetailsResponse, jsonFormat, humanFormat, junitFormat } from '@salesforce/agents'; +import { AgentTestDetailsResponse, jsonFormat, humanFormat, junitFormat, tapFormat } from '@salesforce/agents'; import { Ux } from '@salesforce/sf-plugins-core/Ux'; async function writeFileToDir(outputDir: string, fileName: string, content: string): Promise { @@ -24,7 +24,7 @@ export async function handleTestResults({ outputDir, }: { id: string; - format: 'human' | 'json' | 'junit'; + format: 'human' | 'json' | 'junit' | 'tap'; results: AgentTestDetailsResponse | undefined; jsonEnabled: boolean; outputDir?: string; @@ -59,4 +59,12 @@ export async function handleTestResults({ await writeFileToDir(outputDir, `test-result-${id}.xml`, formatted); } } + + if (format === 'tap') { + const formatted = await tapFormat(results); + ux.log(formatted); + if (outputDir) { + await writeFileToDir(outputDir, `test-result-${id}.txt`, formatted); + } + } } diff --git a/test/mocks/einstein_ai-evaluations_runs_4KBSM000000003F4AQ_details.json b/test/mocks/einstein_ai-evaluations_runs_4KBSM000000003F4AQ_details.json index b895af0..5309372 100644 --- a/test/mocks/einstein_ai-evaluations_runs_4KBSM000000003F4AQ_details.json +++ b/test/mocks/einstein_ai-evaluations_runs_4KBSM000000003F4AQ_details.json @@ -3,6 +3,8 @@ "startTime": "2024-11-28T12:00:00Z", "endTime": "2024-11-28T12:05:00Z", "errorMessage": null, + "subjectName": "Copilot_for_Salesforce", + "testSetName": "CRM_Sanity_v1", "testCases": [ { "status": "COMPLETED", From 3aa02eef3655e990246b372b8d67f46409e2a20e Mon Sep 17 00:00:00 2001 From: Mike Donnalley Date: Fri, 13 Dec 2024 10:20:46 -0700 Subject: [PATCH 2/4] chore: regen schema --- schemas/agent-test-results.json | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/schemas/agent-test-results.json b/schemas/agent-test-results.json index 6914fb8..e67de4d 100644 --- a/schemas/agent-test-results.json +++ b/schemas/agent-test-results.json @@ -20,6 +20,12 @@ "errorMessage": { "type": "string" }, + "subjectName": { + "type": "string" + }, + "testSetName": { + "type": "string" + }, "testCases": { "type": "array", "items": { @@ -27,7 +33,7 @@ } } }, - "required": ["status", "startTime", "testCases"], + "required": ["status", "startTime", "subjectName", "testSetName", "testCases"], "additionalProperties": false }, "TestStatus": { From 0266d731bf89acf8b5e5e13b246de9952f56d241 Mon Sep 17 00:00:00 2001 From: Mike Donnalley Date: Mon, 16 Dec 2024 10:20:07 -0700 Subject: [PATCH 3/4] chore: bump agents --- package.json | 2 +- yarn.lock | 49 ++++++++++++------------------------------------- 2 files changed, 13 insertions(+), 38 deletions(-) diff --git a/package.json b/package.json index 593f408..2bf9395 100644 --- a/package.json +++ b/package.json @@ -10,7 +10,7 @@ "@inquirer/select": "^4.0.1", "@oclif/core": "^4", "@oclif/multi-stage-output": "^0.7.12", - "@salesforce/agents": "^0.4.2", + "@salesforce/agents": "^0.5.0", "@salesforce/core": "^8.8.0", "@salesforce/kit": "^3.2.1", "@salesforce/sf-plugins-core": "^12.1.0", diff --git a/yarn.lock b/yarn.lock index c7a3099..d64cc72 100644 --- a/yarn.lock +++ b/yarn.lock @@ -1389,16 +1389,16 @@ resolved "https://registry.yarnpkg.com/@pkgjs/parseargs/-/parseargs-0.11.0.tgz#a77ea742fab25775145434eb1d2328cf5013ac33" integrity sha512-+1VkjdD0QBLPodGrJUeqarH8VAIvQODIbwh9XpP5Syisf7YoQgsJKPNFoqqLQlu+VQ/tVSshMR6loPMn8U+dPg== -"@salesforce/agents@^0.4.2": - version "0.4.2" - resolved "https://registry.yarnpkg.com/@salesforce/agents/-/agents-0.4.2.tgz#1caaf6a521cc3385a0411840876b77147aea880a" - integrity sha512-oTHEf3Dp6T8sYZzB/6V0HaZg7k/aZfTIFsoqYeewWpEa/KP0oopGHWTO9my15n1Guoq0qif6sWqX+RwWpqGgWw== +"@salesforce/agents@^0.5.0": + version "0.5.0" + resolved "https://registry.yarnpkg.com/@salesforce/agents/-/agents-0.5.0.tgz#f6e4106e4796dde6bc1c5d4045511fd7d27a4640" + integrity sha512-xmPCC3yOXFgsG0Mrt+BDRBVibJzHzBHlKws7szEeNY5it9g2rt0Knl/KzZZTDz9hGAkNCd94T4luCt653l7Pbg== dependencies: "@oclif/table" "^0.3.5" "@salesforce/core" "^8.8.0" "@salesforce/kit" "^3.2.3" "@salesforce/sf-plugins-core" "^12.1.0" - "@salesforce/source-deploy-retrieve" "^12.10.2" + "@salesforce/source-deploy-retrieve" "^12.10.3" fast-xml-parser "^4" nock "^13.5.6" @@ -1544,10 +1544,10 @@ cli-progress "^3.12.0" terminal-link "^3.0.0" -"@salesforce/source-deploy-retrieve@^12.10.2": - version "12.10.2" - resolved "https://registry.yarnpkg.com/@salesforce/source-deploy-retrieve/-/source-deploy-retrieve-12.10.2.tgz#c3737f3751f84cb4754b666edd83c014c91b87bb" - integrity sha512-V7V+nCxhJBcSEh5gJIZt44bsnYJ53HXt3Dec4gphu/Z8mu6Rr3b3yDC3Zw2EKyiDwYYxWvp0pjhqcO2xOTnNLw== +"@salesforce/source-deploy-retrieve@^12.10.3": + version "12.10.3" + resolved "https://registry.yarnpkg.com/@salesforce/source-deploy-retrieve/-/source-deploy-retrieve-12.10.3.tgz#fa16910781188877ffdfa5fde3a0318c0dfe3d07" + integrity sha512-bKIcN6VJajre2chF1xhPCjtR9gZpp8PrFFZ55UcWUMkoFAXscBPRJ7poAeorted3qMzS6wx+AuB27qYUCO+4iQ== dependencies: "@salesforce/core" "^8.8.0" "@salesforce/kit" "^3.2.2" @@ -7741,16 +7741,7 @@ stack-utils@^2.0.6: dependencies: escape-string-regexp "^2.0.0" -"string-width-cjs@npm:string-width@^4.2.0": - version "4.2.3" - resolved "https://registry.yarnpkg.com/string-width/-/string-width-4.2.3.tgz#269c7117d27b05ad2e536830a8ec895ef9c6d010" - integrity sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g== - dependencies: - emoji-regex "^8.0.0" - is-fullwidth-code-point "^3.0.0" - strip-ansi "^6.0.1" - -string-width@^4.0.0, string-width@^4.1.0, string-width@^4.2.0, string-width@^4.2.3: +"string-width-cjs@npm:string-width@^4.2.0", string-width@^4.0.0, string-width@^4.1.0, string-width@^4.2.0, string-width@^4.2.3: version "4.2.3" resolved "https://registry.yarnpkg.com/string-width/-/string-width-4.2.3.tgz#269c7117d27b05ad2e536830a8ec895ef9c6d010" integrity sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g== @@ -7872,14 +7863,7 @@ string_decoder@~1.1.1: dependencies: safe-buffer "~5.1.0" -"strip-ansi-cjs@npm:strip-ansi@^6.0.1": - version "6.0.1" - resolved "https://registry.yarnpkg.com/strip-ansi/-/strip-ansi-6.0.1.tgz#9e26c63d30f53443e9489495b2105d37b67a85d9" - integrity sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A== - dependencies: - ansi-regex "^5.0.1" - -strip-ansi@6.0.1, strip-ansi@^6.0.0, strip-ansi@^6.0.1: +"strip-ansi-cjs@npm:strip-ansi@^6.0.1", strip-ansi@6.0.1, strip-ansi@^6.0.0, strip-ansi@^6.0.1: version "6.0.1" resolved "https://registry.yarnpkg.com/strip-ansi/-/strip-ansi-6.0.1.tgz#9e26c63d30f53443e9489495b2105d37b67a85d9" integrity sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A== @@ -8546,7 +8530,7 @@ workerpool@^6.5.1: resolved "https://registry.yarnpkg.com/workerpool/-/workerpool-6.5.1.tgz#060f73b39d0caf97c6db64da004cd01b4c099544" integrity sha512-Fs4dNYcsdpYSAfVxhnl1L5zTksjvOJxtC5hzMNl+1t9B8hTJTdKDyZ5ju7ztgPy+ft9tBFXoOlDNiOT9WUXZlA== -"wrap-ansi-cjs@npm:wrap-ansi@^7.0.0": +"wrap-ansi-cjs@npm:wrap-ansi@^7.0.0", wrap-ansi@^7.0.0: version "7.0.0" resolved "https://registry.yarnpkg.com/wrap-ansi/-/wrap-ansi-7.0.0.tgz#67e145cff510a6a6984bdf1152911d69d2eb9e43" integrity sha512-YVGIj2kamLSTxw6NsZjoBxfSwsn0ycdesmc4p+Q21c5zPuZ1pl+NfxVdxPtdHvmNVOQ6XSYG4AUtyt/Fi7D16Q== @@ -8564,15 +8548,6 @@ wrap-ansi@^6.2.0: string-width "^4.1.0" strip-ansi "^6.0.0" -wrap-ansi@^7.0.0: - version "7.0.0" - resolved "https://registry.yarnpkg.com/wrap-ansi/-/wrap-ansi-7.0.0.tgz#67e145cff510a6a6984bdf1152911d69d2eb9e43" - integrity sha512-YVGIj2kamLSTxw6NsZjoBxfSwsn0ycdesmc4p+Q21c5zPuZ1pl+NfxVdxPtdHvmNVOQ6XSYG4AUtyt/Fi7D16Q== - dependencies: - ansi-styles "^4.0.0" - string-width "^4.1.0" - strip-ansi "^6.0.0" - wrap-ansi@^8.1.0: version "8.1.0" resolved "https://registry.yarnpkg.com/wrap-ansi/-/wrap-ansi-8.1.0.tgz#56dc22368ee570face1b49819975d9b9a5ead214" From b9d79669d46563576da7cdda7944db4605d7bfe6 Mon Sep 17 00:00:00 2001 From: Mike Donnalley Date: Mon, 16 Dec 2024 10:38:44 -0700 Subject: [PATCH 4/4] chore: bump agents --- package.json | 2 +- schemas/agent-test-results.json | 25 ++- test/commands/agent/test/results.nut.ts | 2 +- ...tions_runs_4KBSM000000003F4AQ_details.json | 166 ++++++++++-------- yarn.lock | 8 +- 5 files changed, 113 insertions(+), 90 deletions(-) diff --git a/package.json b/package.json index 2bf9395..b8b58b1 100644 --- a/package.json +++ b/package.json @@ -10,7 +10,7 @@ "@inquirer/select": "^4.0.1", "@oclif/core": "^4", "@oclif/multi-stage-output": "^0.7.12", - "@salesforce/agents": "^0.5.0", + "@salesforce/agents": "^0.5.1", "@salesforce/core": "^8.8.0", "@salesforce/kit": "^3.2.1", "@salesforce/sf-plugins-core": "^12.1.0", diff --git a/schemas/agent-test-results.json b/schemas/agent-test-results.json index e67de4d..ae4e91f 100644 --- a/schemas/agent-test-results.json +++ b/schemas/agent-test-results.json @@ -23,17 +23,24 @@ "subjectName": { "type": "string" }, - "testSetName": { - "type": "string" - }, - "testCases": { - "type": "array", - "items": { - "$ref": "#/definitions/TestCaseResult" - } + "testSet": { + "type": "object", + "properties": { + "name": { + "type": "string" + }, + "testCases": { + "type": "array", + "items": { + "$ref": "#/definitions/TestCaseResult" + } + } + }, + "required": ["name", "testCases"], + "additionalProperties": false } }, - "required": ["status", "startTime", "subjectName", "testSetName", "testCases"], + "required": ["status", "startTime", "subjectName", "testSet"], "additionalProperties": false }, "TestStatus": { diff --git a/test/commands/agent/test/results.nut.ts b/test/commands/agent/test/results.nut.ts index 5004e91..e59eaf3 100644 --- a/test/commands/agent/test/results.nut.ts +++ b/test/commands/agent/test/results.nut.ts @@ -47,7 +47,7 @@ describe('agent test results NUTs', () => { ).jsonOutput; expect(output?.result.status).to.equal('COMPLETED'); - expect(output?.result.testCases.length).to.equal(2); + expect(output?.result.testSet.testCases.length).to.equal(2); // check that cache does not have an entry const cache = await AgentTestCache.create(); diff --git a/test/mocks/einstein_ai-evaluations_runs_4KBSM000000003F4AQ_details.json b/test/mocks/einstein_ai-evaluations_runs_4KBSM000000003F4AQ_details.json index 5309372..c154dbd 100644 --- a/test/mocks/einstein_ai-evaluations_runs_4KBSM000000003F4AQ_details.json +++ b/test/mocks/einstein_ai-evaluations_runs_4KBSM000000003F4AQ_details.json @@ -4,81 +4,97 @@ "endTime": "2024-11-28T12:05:00Z", "errorMessage": null, "subjectName": "Copilot_for_Salesforce", - "testSetName": "CRM_Sanity_v1", - "testCases": [ - { - "status": "COMPLETED", - "number": 1, - "startTime": "2024-11-28T12:00:10Z", - "endTime": "2024-11-28T12:00:20Z", - "generatedData": { - "type": "AGENT", - "actionsSequence": ["Action1", "Action2"], - "outcome": "Success", - "topic": "Mathematics", - "inputTokensCount": 50, - "outputTokensCount": 55 - }, - "expectationResults": [ - { - "name": "topic_sequence_match", - "actualValue": "Result A", - "expectedValue": "Result A", - "score": 1.0, - "result": "Passed", - "metricLabel": "Accuracy", - "metricExplainability": "Measures the correctness of the result.", - "status": "Completed", - "startTime": "2024-11-28T12:00:12Z", - "endTime": "2024-11-28T12:00:13Z", - "errorCode": null, - "errorMessage": null + "testSet": { + "name": "CRM_Sanity_v1", + "testCases": [ + { + "status": "COMPLETED", + "number": 1, + "startTime": "2024-11-28T12:00:10Z", + "endTime": "2024-11-28T12:00:20Z", + "generatedData": { + "type": "AGENT", + "actionsSequence": ["Action1", "Action2"], + "outcome": "Success", + "topic": "Mathematics", + "inputTokensCount": 50, + "outputTokensCount": 55 }, - { - "name": "action_sequence_match", - "actualValue": "Result B", - "expectedValue": "Result B", - "score": 0.9, - "result": "Passed", - "metricLabel": "Precision", - "metricExplainability": "Measures the precision of the result.", - "status": "Completed", - "startTime": "2024-11-28T12:00:14Z", - "endTime": "2024-11-28T12:00:15Z", - "errorCode": null, - "errorMessage": null - } - ] - }, - { - "status": "ERROR", - "number": 2, - "startTime": "2024-11-28T12:00:30Z", - "endTime": "2024-11-28T12:00:40Z", - "generatedData": { - "type": "AGENT", - "actionsSequence": ["Action3", "Action4"], - "outcome": "Failure", - "topic": "Physics", - "inputTokensCount": 60, - "outputTokensCount": 50 + "expectationResults": [ + { + "name": "topic_sequence_match", + "actualValue": "Result A", + "expectedValue": "Result A", + "score": 1.0, + "result": "Passed", + "metricLabel": "Accuracy", + "metricExplainability": "Measures the correctness of the result.", + "status": "Completed", + "startTime": "2024-11-28T12:00:12Z", + "endTime": "2024-11-28T12:00:13Z", + "errorCode": null, + "errorMessage": null + }, + { + "name": "action_sequence_match", + "actualValue": "Result B", + "expectedValue": "Result B", + "score": 0.9, + "result": "Passed", + "metricLabel": "Precision", + "metricExplainability": "Measures the precision of the result.", + "status": "Completed", + "startTime": "2024-11-28T12:00:14Z", + "endTime": "2024-11-28T12:00:15Z", + "errorCode": null, + "errorMessage": null + } + ] }, - "expectationResults": [ - { - "name": "topic_sequence_match", - "actualValue": "Result C", - "expectedValue": "Result D", - "score": 0.5, - "result": "Failed", - "metricLabel": "Accuracy", - "metricExplainability": "Measures the correctness of the result.", - "status": "Completed", - "startTime": "2024-11-28T12:00:32Z", - "endTime": "2024-11-28T12:00:33Z", - "errorCode": null, - "errorMessage": null - } - ] - } - ] + { + "status": "ERROR", + "number": 2, + "startTime": "2024-11-28T12:00:30Z", + "endTime": "2024-11-28T12:00:40Z", + "generatedData": { + "type": "AGENT", + "actionsSequence": ["Action3", "Action4"], + "outcome": "Failure", + "topic": "Physics", + "inputTokensCount": 60, + "outputTokensCount": 50 + }, + "expectationResults": [ + { + "name": "topic_sequence_match", + "actualValue": "Result C", + "expectedValue": "Result D", + "score": 0.5, + "result": "Failed", + "metricLabel": "Accuracy", + "metricExplainability": "Measures the correctness of the result.", + "status": "Completed", + "startTime": "2024-11-28T12:00:32Z", + "endTime": "2024-11-28T12:00:33Z", + "errorCode": null, + "errorMessage": "Expected \"Result D\" but got \"Result C\"." + }, + { + "name": "topic_sequence_match", + "actualValue": "Result C", + "expectedValue": "Result D", + "score": 0.5, + "result": "Failed", + "metricLabel": "Accuracy", + "metricExplainability": "Measures the correctness of the result.", + "status": "Completed", + "startTime": "2024-11-28T12:00:32Z", + "endTime": "2024-11-28T12:00:33Z", + "errorCode": null, + "errorMessage": "Expected \"Result D\" but got \"Result C\"." + } + ] + } + ] + } } diff --git a/yarn.lock b/yarn.lock index d64cc72..1d9e383 100644 --- a/yarn.lock +++ b/yarn.lock @@ -1389,10 +1389,10 @@ resolved "https://registry.yarnpkg.com/@pkgjs/parseargs/-/parseargs-0.11.0.tgz#a77ea742fab25775145434eb1d2328cf5013ac33" integrity sha512-+1VkjdD0QBLPodGrJUeqarH8VAIvQODIbwh9XpP5Syisf7YoQgsJKPNFoqqLQlu+VQ/tVSshMR6loPMn8U+dPg== -"@salesforce/agents@^0.5.0": - version "0.5.0" - resolved "https://registry.yarnpkg.com/@salesforce/agents/-/agents-0.5.0.tgz#f6e4106e4796dde6bc1c5d4045511fd7d27a4640" - integrity sha512-xmPCC3yOXFgsG0Mrt+BDRBVibJzHzBHlKws7szEeNY5it9g2rt0Knl/KzZZTDz9hGAkNCd94T4luCt653l7Pbg== +"@salesforce/agents@^0.5.1": + version "0.5.1" + resolved "https://registry.yarnpkg.com/@salesforce/agents/-/agents-0.5.1.tgz#b6de16004505432c226c02f612c6b0b7b6227f6f" + integrity sha512-FGpCQ3PVzZunoaQVPAJG05eqafOvf2P7fx2w5aZYVg9yqwM/UnBpTBKVvkmdZDsBRTUYaExr6tvboaMc5Hsfzw== dependencies: "@oclif/table" "^0.3.5" "@salesforce/core" "^8.8.0"