Skip to content

Commit

Permalink
Merge pull request #20 from salesforcecli/mdonnalley/agent-testing
Browse files Browse the repository at this point in the history
feat: implement agent test commands
  • Loading branch information
mdonnalley authored Dec 5, 2024
2 parents 0e65c4c + 4702468 commit 1ae2231
Show file tree
Hide file tree
Showing 35 changed files with 1,226 additions and 191 deletions.
22 changes: 19 additions & 3 deletions command-snapshot.json
Original file line number Diff line number Diff line change
Expand Up @@ -32,15 +32,31 @@
"command": "agent:test:cancel",
"flagAliases": [],
"flagChars": ["i", "o", "r"],
"flags": ["flags-dir", "job-id", "json", "target-org", "use-most-recent"],
"flags": ["api-version", "flags-dir", "job-id", "json", "target-org", "use-most-recent"],
"plugin": "@salesforce/plugin-agent"
},
{
"alias": [],
"command": "agent:test:results",
"flagAliases": [],
"flagChars": ["i", "o"],
"flags": ["api-version", "flags-dir", "job-id", "json", "result-format", "target-org"],
"plugin": "@salesforce/plugin-agent"
},
{
"alias": [],
"command": "agent:test:resume",
"flagAliases": [],
"flagChars": ["i", "o", "r", "w"],
"flags": ["api-version", "flags-dir", "job-id", "json", "result-format", "target-org", "use-most-recent", "wait"],
"plugin": "@salesforce/plugin-agent"
},
{
"alias": [],
"command": "agent:test:run",
"flagAliases": [],
"flagChars": ["d", "i", "o", "w"],
"flags": ["flags-dir", "id", "json", "output-dir", "target-org", "wait"],
"flagChars": ["n", "o", "w"],
"flags": ["api-version", "flags-dir", "json", "name", "result-format", "target-org", "wait"],
"plugin": "@salesforce/plugin-agent"
}
]
4 changes: 2 additions & 2 deletions messages/agent.test.cancel.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ Cancel a running test for an Agent.

Cancel a running test for an Agent, providing the AiEvaluation ID.

# flags.id.summary
# flags.job-id.summary

The AiEvaluation ID.

Expand All @@ -18,4 +18,4 @@ Use the job ID of the most recent test evaluation.

- Cancel a test for an Agent:

<%= config.bin %> <%= command.id %> --id AiEvalId
<%= config.bin %> <%= command.id %> --job-id AiEvalId
19 changes: 19 additions & 0 deletions messages/agent.test.results.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# summary

Get the results of a test evaluation.

# description

Provide the AiEvaluation ID to get the results of a test evaluation.

# flags.job-id.summary

The AiEvaluation ID.

# flags.use-most-recent.summary

Use the job ID of the most recent test evaluation.

# examples

- <%= config.bin %> <%= command.id %> --job-id AiEvalId
29 changes: 29 additions & 0 deletions messages/agent.test.resume.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# summary

Resume a running test for an Agent.

# description

Resume a running test for an Agent, providing the AiEvaluation ID.

# flags.job-id.summary

The AiEvaluation ID.

# flags.use-most-recent.summary

Use the job ID of the most recent test evaluation.

# flags.wait.summary

Number of minutes to wait for the command to complete and display results to the terminal window.

# flags.wait.description

If the command continues to run after the wait period, the CLI returns control of the terminal window to you.

# examples

- Resume a test for an Agent:

<%= config.bin %> <%= command.id %> --job-id AiEvalId
14 changes: 5 additions & 9 deletions messages/agent.test.run.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,13 @@ Start a test for an Agent.

Start a test for an Agent, providing the AiEvalDefinitionVersion ID. Returns the job ID.

# flags.id.summary
# flags.name.summary

The AiEvalDefinitionVersion ID.
The name of the AiEvaluationDefinition to start.

# flags.id.description
# flags.name.description

The AiEvalDefinitionVersion ID.
The name of the AiEvaluationDefinition to start.

# flags.wait.summary

Expand All @@ -22,12 +22,8 @@ Number of minutes to wait for the command to complete and display results to the

If the command continues to run after the wait period, the CLI returns control of the terminal window to you.

# flags.output-dir.summary

Directory in which to store test run files.

# examples

- Start a test for an Agent:

<%= config.bin %> <%= command.id %> --id AiEvalDefVerId
<%= config.bin %> <%= command.id %> --name AiEvalDefVerId
3 changes: 3 additions & 0 deletions messages/shared.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# flags.result-format.summary

Format of the test run results.
18 changes: 14 additions & 4 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,16 @@
"@inquirer/input": "^4.0.1",
"@inquirer/select": "^4.0.1",
"@oclif/core": "^4",
"@salesforce/agents": "^0.2.4",
"@oclif/multi-stage-output": "^0.7.12",
"@salesforce/core": "^8.5.2",
"@salesforce/agents": "^0.3.0",
"@salesforce/core": "^8.8.0",
"@salesforce/kit": "^3.2.1",
"@salesforce/sf-plugins-core": "^12",
"@salesforce/sf-plugins-core": "^12.1.0",
"ansis": "^3.3.2"
},
"devDependencies": {
"@oclif/plugin-command-snapshot": "^5.2.19",
"@oclif/test": "^4.1.0",
"@salesforce/cli-plugins-testkit": "^5.3.35",
"@salesforce/dev-scripts": "^10.2.10",
"@salesforce/plugin-command-reference": "^3.1.29",
Expand Down Expand Up @@ -59,7 +60,16 @@
],
"topics": {
"agent": {
"description": "Commands to work with agents."
"description": "Commands to work with agents.",
"external": true,
"subtopics": {
"test": {
"external": true
},
"generate": {
"external": true
}
}
}
},
"flexibleTaxonomy": true
Expand Down
6 changes: 2 additions & 4 deletions schemas/agent-create.json
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,8 @@
"type": "string"
}
},
"required": [
"isSuccess"
],
"required": ["isSuccess"],
"additionalProperties": false
}
}
}
}
6 changes: 2 additions & 4 deletions schemas/agent-generate-spec.json
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,8 @@
"type": "string"
}
},
"required": [
"isSuccess"
],
"required": ["isSuccess"],
"additionalProperties": false
}
}
}
}
9 changes: 3 additions & 6 deletions schemas/agent-test-cancel.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
"AgentTestCancelResult": {
"type": "object",
"properties": {
"jobId": {
"aiEvaluationId": {
"type": "string"
},
"success": {
Expand All @@ -18,11 +18,8 @@
"type": "string"
}
},
"required": [
"jobId",
"success"
],
"required": ["aiEvaluationId", "success"],
"additionalProperties": false
}
}
}
}
145 changes: 145 additions & 0 deletions schemas/agent-test-results.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
{
"$schema": "http://json-schema.org/draft-07/schema#",
"$ref": "#/definitions/AgentTestResultsResult",
"definitions": {
"AgentTestResultsResult": {
"$ref": "#/definitions/AgentTestDetailsResponse"
},
"AgentTestDetailsResponse": {
"type": "object",
"properties": {
"status": {
"$ref": "#/definitions/TestStatus"
},
"startTime": {
"type": "string"
},
"endTime": {
"type": "string"
},
"errorMessage": {
"type": "string"
},
"testCases": {
"type": "array",
"items": {
"$ref": "#/definitions/TestCaseResult"
}
}
},
"required": ["status", "startTime", "testCases"],
"additionalProperties": false
},
"TestStatus": {
"type": "string",
"enum": ["NEW", "IN_PROGRESS", "COMPLETED", "ERROR"]
},
"TestCaseResult": {
"type": "object",
"properties": {
"status": {
"$ref": "#/definitions/TestStatus"
},
"number": {
"type": "string"
},
"startTime": {
"type": "string"
},
"endTime": {
"type": "string"
},
"generatedData": {
"type": "object",
"properties": {
"type": {
"type": "string",
"const": "AGENT"
},
"actionsSequence": {
"type": "array",
"items": {
"type": "string"
}
},
"outcome": {
"type": "string",
"enum": ["Success", "Failure"]
},
"topic": {
"type": "string"
},
"inputTokensCount": {
"type": "string"
},
"outputTokensCount": {
"type": "string"
}
},
"required": ["type", "actionsSequence", "outcome", "topic", "inputTokensCount", "outputTokensCount"],
"additionalProperties": false
},
"expectationResults": {
"type": "array",
"items": {
"type": "object",
"properties": {
"name": {
"type": "string"
},
"actualValue": {
"type": "string"
},
"expectedValue": {
"type": "string"
},
"score": {
"type": "number"
},
"result": {
"type": "string",
"enum": ["Passed", "Failed"]
},
"metricLabel": {
"type": "string",
"enum": ["Accuracy", "Precision"]
},
"metricExplainability": {
"type": "string"
},
"status": {
"$ref": "#/definitions/TestStatus"
},
"startTime": {
"type": "string"
},
"endTime": {
"type": "string"
},
"errorCode": {
"type": "string"
},
"errorMessage": {
"type": "string"
}
},
"required": [
"name",
"actualValue",
"expectedValue",
"score",
"result",
"metricLabel",
"metricExplainability",
"status",
"startTime"
],
"additionalProperties": false
}
}
},
"required": ["status", "number", "startTime", "generatedData", "expectationResults"],
"additionalProperties": false
}
}
}
19 changes: 19 additions & 0 deletions schemas/agent-test-resume.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
{
"$schema": "http://json-schema.org/draft-07/schema#",
"$ref": "#/definitions/AgentTestResumeResult",
"definitions": {
"AgentTestResumeResult": {
"type": "object",
"properties": {
"aiEvaluationId": {
"type": "string"
},
"status": {
"type": "string"
}
},
"required": ["aiEvaluationId", "status"],
"additionalProperties": false
}
}
}
Loading

0 comments on commit 1ae2231

Please sign in to comment.