Merge pull request #20 from salesforcecli/mdonnalley/agent-testing

feat: implement agent test commands
salesforcecli · Dec 5, 2024 · 1ae2231 · 1ae2231
2 parents 0e65c4c + 4702468
commit 1ae2231
Show file tree

Hide file tree

Showing 35 changed files with 1,226 additions and 191 deletions.
diff --git a/command-snapshot.json b/command-snapshot.json
@@ -32,15 +32,31 @@
     "command": "agent:test:cancel",
     "flagAliases": [],
     "flagChars": ["i", "o", "r"],
-    "flags": ["flags-dir", "job-id", "json", "target-org", "use-most-recent"],
+    "flags": ["api-version", "flags-dir", "job-id", "json", "target-org", "use-most-recent"],
+    "plugin": "@salesforce/plugin-agent"
+  },
+  {
+    "alias": [],
+    "command": "agent:test:results",
+    "flagAliases": [],
+    "flagChars": ["i", "o"],
+    "flags": ["api-version", "flags-dir", "job-id", "json", "result-format", "target-org"],
+    "plugin": "@salesforce/plugin-agent"
+  },
+  {
+    "alias": [],
+    "command": "agent:test:resume",
+    "flagAliases": [],
+    "flagChars": ["i", "o", "r", "w"],
+    "flags": ["api-version", "flags-dir", "job-id", "json", "result-format", "target-org", "use-most-recent", "wait"],
     "plugin": "@salesforce/plugin-agent"
   },
   {
     "alias": [],
     "command": "agent:test:run",
     "flagAliases": [],
-    "flagChars": ["d", "i", "o", "w"],
-    "flags": ["flags-dir", "id", "json", "output-dir", "target-org", "wait"],
+    "flagChars": ["n", "o", "w"],
+    "flags": ["api-version", "flags-dir", "json", "name", "result-format", "target-org", "wait"],
     "plugin": "@salesforce/plugin-agent"
   }
 ]
diff --git a/messages/agent.test.cancel.md b/messages/agent.test.cancel.md
@@ -6,7 +6,7 @@ Cancel a running test for an Agent.
 
 Cancel a running test for an Agent, providing the AiEvaluation ID.
 
-# flags.id.summary
+# flags.job-id.summary
 
 The AiEvaluation ID.
 
@@ -18,4 +18,4 @@ Use the job ID of the most recent test evaluation.
 
 - Cancel a test for an Agent:
 
-  <%= config.bin %> <%= command.id %> --id AiEvalId
+  <%= config.bin %> <%= command.id %> --job-id AiEvalId
diff --git a/messages/agent.test.results.md b/messages/agent.test.results.md
@@ -0,0 +1,19 @@
+# summary
+
+Get the results of a test evaluation.
+
+# description
+
+Provide the AiEvaluation ID to get the results of a test evaluation.
+
+# flags.job-id.summary
+
+The AiEvaluation ID.
+
+# flags.use-most-recent.summary
+
+Use the job ID of the most recent test evaluation.
+
+# examples
+
+- <%= config.bin %> <%= command.id %> --job-id AiEvalId
diff --git a/messages/agent.test.resume.md b/messages/agent.test.resume.md
@@ -0,0 +1,29 @@
+# summary
+
+Resume a running test for an Agent.
+
+# description
+
+Resume a running test for an Agent, providing the AiEvaluation ID.
+
+# flags.job-id.summary
+
+The AiEvaluation ID.
+
+# flags.use-most-recent.summary
+
+Use the job ID of the most recent test evaluation.
+
+# flags.wait.summary
+
+Number of minutes to wait for the command to complete and display results to the terminal window.
+
+# flags.wait.description
+
+If the command continues to run after the wait period, the CLI returns control of the terminal window to you.
+
+# examples
+
+- Resume a test for an Agent:
+
+  <%= config.bin %> <%= command.id %> --job-id AiEvalId
diff --git a/messages/agent.test.run.md b/messages/agent.test.run.md
@@ -6,13 +6,13 @@ Start a test for an Agent.
 
 Start a test for an Agent, providing the AiEvalDefinitionVersion ID. Returns the job ID.
 
-# flags.id.summary
+# flags.name.summary
 
-The AiEvalDefinitionVersion ID.
+The name of the AiEvaluationDefinition to start.
 
-# flags.id.description
+# flags.name.description
 
-The AiEvalDefinitionVersion ID.
+The name of the AiEvaluationDefinition to start.
 
 # flags.wait.summary
 
@@ -22,12 +22,8 @@ Number of minutes to wait for the command to complete and display results to the
 
 If the command continues to run after the wait period, the CLI returns control of the terminal window to you.
 
-# flags.output-dir.summary
-
-Directory in which to store test run files.
-
 # examples
 
 - Start a test for an Agent:
 
-  <%= config.bin %> <%= command.id %> --id AiEvalDefVerId
+  <%= config.bin %> <%= command.id %> --name AiEvalDefVerId
diff --git a/messages/shared.md b/messages/shared.md
@@ -0,0 +1,3 @@
+# flags.result-format.summary
+
+Format of the test run results.
diff --git a/package.json b/package.json
@@ -9,15 +9,16 @@
     "@inquirer/input": "^4.0.1",
     "@inquirer/select": "^4.0.1",
     "@oclif/core": "^4",
-    "@salesforce/agents": "^0.2.4",
     "@oclif/multi-stage-output": "^0.7.12",
-    "@salesforce/core": "^8.5.2",
+    "@salesforce/agents": "^0.3.0",
+    "@salesforce/core": "^8.8.0",
     "@salesforce/kit": "^3.2.1",
-    "@salesforce/sf-plugins-core": "^12",
+    "@salesforce/sf-plugins-core": "^12.1.0",
     "ansis": "^3.3.2"
   },
   "devDependencies": {
     "@oclif/plugin-command-snapshot": "^5.2.19",
+    "@oclif/test": "^4.1.0",
     "@salesforce/cli-plugins-testkit": "^5.3.35",
     "@salesforce/dev-scripts": "^10.2.10",
     "@salesforce/plugin-command-reference": "^3.1.29",
@@ -59,7 +60,16 @@
     ],
     "topics": {
       "agent": {
-        "description": "Commands to work with agents."
+        "description": "Commands to work with agents.",
+        "external": true,
+        "subtopics": {
+          "test": {
+            "external": true
+          },
+          "generate": {
+            "external": true
+          }
+        }
       }
     },
     "flexibleTaxonomy": true

diff --git a/schemas/agent-create.json b/schemas/agent-create.json
@@ -12,10 +12,8 @@
           "type": "string"
         }
       },
-      "required": [
-        "isSuccess"
-      ],
+      "required": ["isSuccess"],
       "additionalProperties": false
     }
   }
-}
+}
diff --git a/schemas/agent-generate-spec.json b/schemas/agent-generate-spec.json
@@ -15,10 +15,8 @@
           "type": "string"
         }
       },
-      "required": [
-        "isSuccess"
-      ],
+      "required": ["isSuccess"],
       "additionalProperties": false
     }
   }
-}
+}
diff --git a/schemas/agent-test-cancel.json b/schemas/agent-test-cancel.json
@@ -5,7 +5,7 @@
     "AgentTestCancelResult": {
       "type": "object",
       "properties": {
-        "jobId": {
+        "aiEvaluationId": {
           "type": "string"
         },
         "success": {
@@ -18,11 +18,8 @@
           "type": "string"
         }
       },
-      "required": [
-        "jobId",
-        "success"
-      ],
+      "required": ["aiEvaluationId", "success"],
       "additionalProperties": false
     }
   }
-}
+}
diff --git a/schemas/agent-test-results.json b/schemas/agent-test-results.json
@@ -0,0 +1,145 @@
+{
+  "$schema": "http://json-schema.org/draft-07/schema#",
+  "$ref": "#/definitions/AgentTestResultsResult",
+  "definitions": {
+    "AgentTestResultsResult": {
+      "$ref": "#/definitions/AgentTestDetailsResponse"
+    },
+    "AgentTestDetailsResponse": {
+      "type": "object",
+      "properties": {
+        "status": {
+          "$ref": "#/definitions/TestStatus"
+        },
+        "startTime": {
+          "type": "string"
+        },
+        "endTime": {
+          "type": "string"
+        },
+        "errorMessage": {
+          "type": "string"
+        },
+        "testCases": {
+          "type": "array",
+          "items": {
+            "$ref": "#/definitions/TestCaseResult"
+          }
+        }
+      },
+      "required": ["status", "startTime", "testCases"],
+      "additionalProperties": false
+    },
+    "TestStatus": {
+      "type": "string",
+      "enum": ["NEW", "IN_PROGRESS", "COMPLETED", "ERROR"]
+    },
+    "TestCaseResult": {
+      "type": "object",
+      "properties": {
+        "status": {
+          "$ref": "#/definitions/TestStatus"
+        },
+        "number": {
+          "type": "string"
+        },
+        "startTime": {
+          "type": "string"
+        },
+        "endTime": {
+          "type": "string"
+        },
+        "generatedData": {
+          "type": "object",
+          "properties": {
+            "type": {
+              "type": "string",
+              "const": "AGENT"
+            },
+            "actionsSequence": {
+              "type": "array",
+              "items": {
+                "type": "string"
+              }
+            },
+            "outcome": {
+              "type": "string",
+              "enum": ["Success", "Failure"]
+            },
+            "topic": {
+              "type": "string"
+            },
+            "inputTokensCount": {
+              "type": "string"
+            },
+            "outputTokensCount": {
+              "type": "string"
+            }
+          },
+          "required": ["type", "actionsSequence", "outcome", "topic", "inputTokensCount", "outputTokensCount"],
+          "additionalProperties": false
+        },
+        "expectationResults": {
+          "type": "array",
+          "items": {
+            "type": "object",
+            "properties": {
+              "name": {
+                "type": "string"
+              },
+              "actualValue": {
+                "type": "string"
+              },
+              "expectedValue": {
+                "type": "string"
+              },
+              "score": {
+                "type": "number"
+              },
+              "result": {
+                "type": "string",
+                "enum": ["Passed", "Failed"]
+              },
+              "metricLabel": {
+                "type": "string",
+                "enum": ["Accuracy", "Precision"]
+              },
+              "metricExplainability": {
+                "type": "string"
+              },
+              "status": {
+                "$ref": "#/definitions/TestStatus"
+              },
+              "startTime": {
+                "type": "string"
+              },
+              "endTime": {
+                "type": "string"
+              },
+              "errorCode": {
+                "type": "string"
+              },
+              "errorMessage": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "name",
+              "actualValue",
+              "expectedValue",
+              "score",
+              "result",
+              "metricLabel",
+              "metricExplainability",
+              "status",
+              "startTime"
+            ],
+            "additionalProperties": false
+          }
+        }
+      },
+      "required": ["status", "number", "startTime", "generatedData", "expectationResults"],
+      "additionalProperties": false
+    }
+  }
+}
diff --git a/schemas/agent-test-resume.json b/schemas/agent-test-resume.json
@@ -0,0 +1,19 @@
+{
+  "$schema": "http://json-schema.org/draft-07/schema#",
+  "$ref": "#/definitions/AgentTestResumeResult",
+  "definitions": {
+    "AgentTestResumeResult": {
+      "type": "object",
+      "properties": {
+        "aiEvaluationId": {
+          "type": "string"
+        },
+        "status": {
+          "type": "string"
+        }
+      },
+      "required": ["aiEvaluationId", "status"],
+      "additionalProperties": false
+    }
+  }
+}
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,3 @@
		# flags.result-format.summary

		Format of the test run results.