diff --git a/command-snapshot.json b/command-snapshot.json
index 176d7c7..7ad7dc9 100644
--- a/command-snapshot.json
+++ b/command-snapshot.json
@@ -32,15 +32,31 @@
     "command": "agent:test:cancel",
     "flagAliases": [],
     "flagChars": ["i", "o", "r"],
-    "flags": ["flags-dir", "job-id", "json", "target-org", "use-most-recent"],
+    "flags": ["api-version", "flags-dir", "job-id", "json", "target-org", "use-most-recent"],
+    "plugin": "@salesforce/plugin-agent"
+  },
+  {
+    "alias": [],
+    "command": "agent:test:results",
+    "flagAliases": [],
+    "flagChars": ["i", "o"],
+    "flags": ["api-version", "flags-dir", "job-id", "json", "result-format", "target-org"],
+    "plugin": "@salesforce/plugin-agent"
+  },
+  {
+    "alias": [],
+    "command": "agent:test:resume",
+    "flagAliases": [],
+    "flagChars": ["i", "o", "r", "w"],
+    "flags": ["api-version", "flags-dir", "job-id", "json", "result-format", "target-org", "use-most-recent", "wait"],
     "plugin": "@salesforce/plugin-agent"
   },
   {
     "alias": [],
     "command": "agent:test:run",
     "flagAliases": [],
-    "flagChars": ["d", "i", "o", "w"],
-    "flags": ["flags-dir", "id", "json", "output-dir", "target-org", "wait"],
+    "flagChars": ["n", "o", "w"],
+    "flags": ["api-version", "flags-dir", "json", "name", "result-format", "target-org", "wait"],
     "plugin": "@salesforce/plugin-agent"
   }
 ]
diff --git a/messages/agent.test.cancel.md b/messages/agent.test.cancel.md
index 5d6f229..f9f8e91 100644
--- a/messages/agent.test.cancel.md
+++ b/messages/agent.test.cancel.md
@@ -6,7 +6,7 @@ Cancel a running test for an Agent.
 
 Cancel a running test for an Agent, providing the AiEvaluation ID.
 
-# flags.id.summary
+# flags.job-id.summary
 
 The AiEvaluation ID.
 
@@ -18,4 +18,4 @@ Use the job ID of the most recent test evaluation.
 
 - Cancel a test for an Agent:
 
-  <%= config.bin %> <%= command.id %> --id AiEvalId
+  <%= config.bin %> <%= command.id %> --job-id AiEvalId
diff --git a/messages/agent.test.results.md b/messages/agent.test.results.md
new file mode 100644
index 0000000..b46b851
--- /dev/null
+++ b/messages/agent.test.results.md
@@ -0,0 +1,19 @@
+# summary
+
+Get the results of a test evaluation.
+
+# description
+
+Provide the AiEvaluation ID to get the results of a test evaluation.
+
+# flags.job-id.summary
+
+The AiEvaluation ID.
+
+# flags.use-most-recent.summary
+
+Use the job ID of the most recent test evaluation.
+
+# examples
+
+- <%= config.bin %> <%= command.id %> --job-id AiEvalId
diff --git a/messages/agent.test.resume.md b/messages/agent.test.resume.md
new file mode 100644
index 0000000..9c7122f
--- /dev/null
+++ b/messages/agent.test.resume.md
@@ -0,0 +1,29 @@
+# summary
+
+Resume a running test for an Agent.
+
+# description
+
+Resume a running test for an Agent, providing the AiEvaluation ID.
+
+# flags.job-id.summary
+
+The AiEvaluation ID.
+
+# flags.use-most-recent.summary
+
+Use the job ID of the most recent test evaluation.
+
+# flags.wait.summary
+
+Number of minutes to wait for the command to complete and display results to the terminal window.
+
+# flags.wait.description
+
+If the command continues to run after the wait period, the CLI returns control of the terminal window to you.
+
+# examples
+
+- Resume a test for an Agent:
+
+  <%= config.bin %> <%= command.id %> --job-id AiEvalId
diff --git a/messages/agent.test.run.md b/messages/agent.test.run.md
index 4b73b25..d187b98 100644
--- a/messages/agent.test.run.md
+++ b/messages/agent.test.run.md
@@ -6,13 +6,13 @@ Start a test for an Agent.
 
 Start a test for an Agent, providing the AiEvalDefinitionVersion ID. Returns the job ID.
 
-# flags.id.summary
+# flags.name.summary
 
-The AiEvalDefinitionVersion ID.
+The name of the AiEvaluationDefinition to start.
 
-# flags.id.description
+# flags.name.description
 
-The AiEvalDefinitionVersion ID.
+The name of the AiEvaluationDefinition to start.
 
 # flags.wait.summary
 
@@ -22,12 +22,8 @@ Number of minutes to wait for the command to complete and display results to the
 
 If the command continues to run after the wait period, the CLI returns control of the terminal window to you.
 
-# flags.output-dir.summary
-
-Directory in which to store test run files.
-
 # examples
 
 - Start a test for an Agent:
 
-  <%= config.bin %> <%= command.id %> --id AiEvalDefVerId
+  <%= config.bin %> <%= command.id %> --name AiEvalDefVerId
diff --git a/messages/shared.md b/messages/shared.md
new file mode 100644
index 0000000..af9268c
--- /dev/null
+++ b/messages/shared.md
@@ -0,0 +1,3 @@
+# flags.result-format.summary
+
+Format of the test run results.
diff --git a/package.json b/package.json
index f97069a..15f4ea5 100644
--- a/package.json
+++ b/package.json
@@ -9,15 +9,16 @@
     "@inquirer/input": "^4.0.1",
     "@inquirer/select": "^4.0.1",
     "@oclif/core": "^4",
-    "@salesforce/agents": "^0.2.4",
     "@oclif/multi-stage-output": "^0.7.12",
-    "@salesforce/core": "^8.5.2",
+    "@salesforce/agents": "^0.3.0",
+    "@salesforce/core": "^8.8.0",
     "@salesforce/kit": "^3.2.1",
-    "@salesforce/sf-plugins-core": "^12",
+    "@salesforce/sf-plugins-core": "^12.1.0",
     "ansis": "^3.3.2"
   },
   "devDependencies": {
     "@oclif/plugin-command-snapshot": "^5.2.19",
+    "@oclif/test": "^4.1.0",
     "@salesforce/cli-plugins-testkit": "^5.3.35",
     "@salesforce/dev-scripts": "^10.2.10",
     "@salesforce/plugin-command-reference": "^3.1.29",
@@ -59,7 +60,16 @@
     ],
     "topics": {
       "agent": {
-        "description": "Commands to work with agents."
+        "description": "Commands to work with agents.",
+        "external": true,
+        "subtopics": {
+          "test": {
+            "external": true
+          },
+          "generate": {
+            "external": true
+          }
+        }
       }
     },
     "flexibleTaxonomy": true
diff --git a/schemas/agent-create.json b/schemas/agent-create.json
index 9196a7f..eb28296 100644
--- a/schemas/agent-create.json
+++ b/schemas/agent-create.json
@@ -12,10 +12,8 @@
           "type": "string"
         }
       },
-      "required": [
-        "isSuccess"
-      ],
+      "required": ["isSuccess"],
       "additionalProperties": false
     }
   }
-}
\ No newline at end of file
+}
diff --git a/schemas/agent-generate-spec.json b/schemas/agent-generate-spec.json
index 38d003f..51bb4c2 100644
--- a/schemas/agent-generate-spec.json
+++ b/schemas/agent-generate-spec.json
@@ -15,10 +15,8 @@
           "type": "string"
         }
       },
-      "required": [
-        "isSuccess"
-      ],
+      "required": ["isSuccess"],
       "additionalProperties": false
     }
   }
-}
\ No newline at end of file
+}
diff --git a/schemas/agent-test-cancel.json b/schemas/agent-test-cancel.json
index 283df86..bf11239 100644
--- a/schemas/agent-test-cancel.json
+++ b/schemas/agent-test-cancel.json
@@ -5,7 +5,7 @@
     "AgentTestCancelResult": {
       "type": "object",
       "properties": {
-        "jobId": {
+        "aiEvaluationId": {
           "type": "string"
         },
         "success": {
@@ -18,11 +18,8 @@
           "type": "string"
         }
       },
-      "required": [
-        "jobId",
-        "success"
-      ],
+      "required": ["aiEvaluationId", "success"],
       "additionalProperties": false
     }
   }
-}
\ No newline at end of file
+}
diff --git a/schemas/agent-test-results.json b/schemas/agent-test-results.json
new file mode 100644
index 0000000..6914fb8
--- /dev/null
+++ b/schemas/agent-test-results.json
@@ -0,0 +1,145 @@
+{
+  "$schema": "http://json-schema.org/draft-07/schema#",
+  "$ref": "#/definitions/AgentTestResultsResult",
+  "definitions": {
+    "AgentTestResultsResult": {
+      "$ref": "#/definitions/AgentTestDetailsResponse"
+    },
+    "AgentTestDetailsResponse": {
+      "type": "object",
+      "properties": {
+        "status": {
+          "$ref": "#/definitions/TestStatus"
+        },
+        "startTime": {
+          "type": "string"
+        },
+        "endTime": {
+          "type": "string"
+        },
+        "errorMessage": {
+          "type": "string"
+        },
+        "testCases": {
+          "type": "array",
+          "items": {
+            "$ref": "#/definitions/TestCaseResult"
+          }
+        }
+      },
+      "required": ["status", "startTime", "testCases"],
+      "additionalProperties": false
+    },
+    "TestStatus": {
+      "type": "string",
+      "enum": ["NEW", "IN_PROGRESS", "COMPLETED", "ERROR"]
+    },
+    "TestCaseResult": {
+      "type": "object",
+      "properties": {
+        "status": {
+          "$ref": "#/definitions/TestStatus"
+        },
+        "number": {
+          "type": "string"
+        },
+        "startTime": {
+          "type": "string"
+        },
+        "endTime": {
+          "type": "string"
+        },
+        "generatedData": {
+          "type": "object",
+          "properties": {
+            "type": {
+              "type": "string",
+              "const": "AGENT"
+            },
+            "actionsSequence": {
+              "type": "array",
+              "items": {
+                "type": "string"
+              }
+            },
+            "outcome": {
+              "type": "string",
+              "enum": ["Success", "Failure"]
+            },
+            "topic": {
+              "type": "string"
+            },
+            "inputTokensCount": {
+              "type": "string"
+            },
+            "outputTokensCount": {
+              "type": "string"
+            }
+          },
+          "required": ["type", "actionsSequence", "outcome", "topic", "inputTokensCount", "outputTokensCount"],
+          "additionalProperties": false
+        },
+        "expectationResults": {
+          "type": "array",
+          "items": {
+            "type": "object",
+            "properties": {
+              "name": {
+                "type": "string"
+              },
+              "actualValue": {
+                "type": "string"
+              },
+              "expectedValue": {
+                "type": "string"
+              },
+              "score": {
+                "type": "number"
+              },
+              "result": {
+                "type": "string",
+                "enum": ["Passed", "Failed"]
+              },
+              "metricLabel": {
+                "type": "string",
+                "enum": ["Accuracy", "Precision"]
+              },
+              "metricExplainability": {
+                "type": "string"
+              },
+              "status": {
+                "$ref": "#/definitions/TestStatus"
+              },
+              "startTime": {
+                "type": "string"
+              },
+              "endTime": {
+                "type": "string"
+              },
+              "errorCode": {
+                "type": "string"
+              },
+              "errorMessage": {
+                "type": "string"
+              }
+            },
+            "required": [
+              "name",
+              "actualValue",
+              "expectedValue",
+              "score",
+              "result",
+              "metricLabel",
+              "metricExplainability",
+              "status",
+              "startTime"
+            ],
+            "additionalProperties": false
+          }
+        }
+      },
+      "required": ["status", "number", "startTime", "generatedData", "expectationResults"],
+      "additionalProperties": false
+    }
+  }
+}
diff --git a/schemas/agent-test-resume.json b/schemas/agent-test-resume.json
new file mode 100644
index 0000000..5f86d12
--- /dev/null
+++ b/schemas/agent-test-resume.json
@@ -0,0 +1,19 @@
+{
+  "$schema": "http://json-schema.org/draft-07/schema#",
+  "$ref": "#/definitions/AgentTestResumeResult",
+  "definitions": {
+    "AgentTestResumeResult": {
+      "type": "object",
+      "properties": {
+        "aiEvaluationId": {
+          "type": "string"
+        },
+        "status": {
+          "type": "string"
+        }
+      },
+      "required": ["aiEvaluationId", "status"],
+      "additionalProperties": false
+    }
+  }
+}
diff --git a/schemas/agent-test-run.json b/schemas/agent-test-run.json
index 1fc7379..284c121 100644
--- a/schemas/agent-test-run.json
+++ b/schemas/agent-test-run.json
@@ -5,24 +5,15 @@
     "AgentTestRunResult": {
       "type": "object",
       "properties": {
-        "jobId": {
+        "aiEvaluationId": {
           "type": "string"
         },
-        "success": {
-          "type": "boolean"
-        },
-        "errorCode": {
-          "type": "string"
-        },
-        "message": {
+        "status": {
           "type": "string"
         }
       },
-      "required": [
-        "jobId",
-        "success"
-      ],
+      "required": ["aiEvaluationId", "status"],
       "additionalProperties": false
     }
   }
-}
\ No newline at end of file
+}
diff --git a/src/agentTestCache.ts b/src/agentTestCache.ts
new file mode 100644
index 0000000..bd06c85
--- /dev/null
+++ b/src/agentTestCache.ts
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2024, salesforce.com, inc.
+ * All rights reserved.
+ * Licensed under the BSD 3-Clause license.
+ * For full license text, see LICENSE.txt file in the repo root or https://opensource.org/licenses/BSD-3-Clause
+ */
+
+import { Global, SfError, TTLConfig } from '@salesforce/core';
+import { Duration } from '@salesforce/kit';
+
+type CacheContents = {
+  aiEvaluationId: string;
+  name: string;
+};
+
+export class AgentTestCache extends TTLConfig<TTLConfig.Options, CacheContents> {
+  public static getFileName(): string {
+    return 'agent-test-cache.json';
+  }
+
+  public static getDefaultOptions(): TTLConfig.Options {
+    return {
+      isGlobal: true,
+      isState: true,
+      filename: AgentTestCache.getFileName(),
+      stateFolder: Global.SF_STATE_FOLDER,
+      ttl: Duration.days(7),
+    };
+  }
+
+  public async createCacheEntry(aiEvaluationId: string, name: string): Promise<void> {
+    if (!aiEvaluationId) throw new SfError('aiEvaluationId is required to create a cache entry');
+
+    this.set(aiEvaluationId, { aiEvaluationId, name });
+    await this.write();
+  }
+
+  public async removeCacheEntry(aiEvaluationId: string): Promise<void> {
+    if (!aiEvaluationId) throw new SfError('aiEvaluationId is required to remove a cache entry');
+
+    this.unset(aiEvaluationId);
+    await this.write();
+  }
+
+  public resolveFromCache(): CacheContents {
+    const key = this.getLatestKey();
+    if (!key) throw new SfError('Could not find an aiEvaluationId to resume');
+
+    return this.get(key);
+  }
+
+  public useIdOrMostRecent(
+    aiEvaluationId: string | undefined,
+    useMostRecent: boolean
+  ): { aiEvaluationId: string; name?: string } {
+    if (aiEvaluationId && useMostRecent) {
+      throw new SfError('Cannot specify both an aiEvaluationId and use most recent flag');
+    }
+
+    if (!aiEvaluationId && !useMostRecent) {
+      throw new SfError('Must specify either an aiEvaluationId or use most recent flag');
+    }
+
+    if (aiEvaluationId) {
+      return { aiEvaluationId };
+    }
+
+    return this.resolveFromCache();
+  }
+}
diff --git a/src/commands/agent/test/cancel.ts b/src/commands/agent/test/cancel.ts
index 676545b..118ab35 100644
--- a/src/commands/agent/test/cancel.ts
+++ b/src/commands/agent/test/cancel.ts
@@ -7,12 +7,14 @@
 
 import { SfCommand, Flags } from '@salesforce/sf-plugins-core';
 import { Messages } from '@salesforce/core';
+import { AgentTester } from '@salesforce/agents';
+import { AgentTestCache } from '../../../agentTestCache.js';
 
 Messages.importMessagesDirectoryFromMetaUrl(import.meta.url);
 const messages = Messages.loadMessages('@salesforce/plugin-agent', 'agent.test.cancel');
 
 export type AgentTestCancelResult = {
-  jobId: string; // AiEvaluation.Id
+  aiEvaluationId: string;
   success: boolean;
   errorCode?: string;
   message?: string;
@@ -22,38 +24,41 @@ export default class AgentTestCancel extends SfCommand<AgentTestCancelResult> {
   public static readonly summary = messages.getMessage('summary');
   public static readonly description = messages.getMessage('description');
   public static readonly examples = messages.getMessages('examples');
-  public static state = 'beta';
+  public static readonly state = 'beta';
 
   public static readonly flags = {
     'target-org': Flags.requiredOrg(),
+    'api-version': Flags.orgApiVersion(),
     'job-id': Flags.string({
       char: 'i',
-      required: true,
-      summary: messages.getMessage('flags.id.summary'),
+      summary: messages.getMessage('flags.job-id.summary'),
+      exactlyOne: ['use-most-recent', 'job-id'],
     }),
     'use-most-recent': Flags.boolean({
       char: 'r',
       summary: messages.getMessage('flags.use-most-recent.summary'),
       exactlyOne: ['use-most-recent', 'job-id'],
     }),
-    //
-    // Future flags:
-    //   ??? api-version ???
   };
 
   public async run(): Promise<AgentTestCancelResult> {
     const { flags } = await this.parse(AgentTestCancel);
 
-    this.log(`Canceling tests for AiEvaluation Job: ${flags['job-id']}`);
+    const agentTestCache = await AgentTestCache.create();
+    const { aiEvaluationId } = agentTestCache.useIdOrMostRecent(flags['job-id'], flags['use-most-recent']);
+
+    this.log(`Canceling tests for AiEvaluation Job: ${aiEvaluationId}`);
 
-    // Call SF Eval Connect API passing AiEvaluation.Id
-    // POST to /einstein/ai-evaluations/{aiEvaluationId}/stop
+    const agentTester = new AgentTester(flags['target-org'].getConnection(flags['api-version']));
+    const result = await agentTester.cancel(aiEvaluationId);
 
-    // Returns: AiEvaluation.Id
+    if (result.success) {
+      await agentTestCache.removeCacheEntry(aiEvaluationId);
+    }
 
     return {
-      success: true,
-      jobId: '4KBSM000000003F4AQ', // AiEvaluation.Id
+      success: result.success,
+      aiEvaluationId,
     };
   }
 }
diff --git a/src/commands/agent/test/results.ts b/src/commands/agent/test/results.ts
new file mode 100644
index 0000000..5443fc4
--- /dev/null
+++ b/src/commands/agent/test/results.ts
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2024, salesforce.com, inc.
+ * All rights reserved.
+ * Licensed under the BSD 3-Clause license.
+ * For full license text, see LICENSE.txt file in the repo root or https://opensource.org/licenses/BSD-3-Clause
+ */
+
+import { SfCommand, Flags } from '@salesforce/sf-plugins-core';
+import { Messages } from '@salesforce/core';
+import { AgentTester, AgentTestDetailsResponse, humanFormat } from '@salesforce/agents';
+import { resultFormatFlag } from '../../../flags.js';
+
+Messages.importMessagesDirectoryFromMetaUrl(import.meta.url);
+const messages = Messages.loadMessages('@salesforce/plugin-agent', 'agent.test.results');
+
+export type AgentTestResultsResult = AgentTestDetailsResponse;
+
+export default class AgentTestResults extends SfCommand<AgentTestResultsResult> {
+  public static readonly summary = messages.getMessage('summary');
+  public static readonly description = messages.getMessage('description');
+  public static readonly examples = messages.getMessages('examples');
+  public static readonly state = 'beta';
+
+  public static readonly flags = {
+    'target-org': Flags.requiredOrg(),
+    'api-version': Flags.orgApiVersion(),
+    'job-id': Flags.string({
+      summary: messages.getMessage('flags.job-id.summary'),
+      char: 'i',
+      required: true,
+    }),
+    'result-format': resultFormatFlag(),
+  };
+
+  public async run(): Promise<AgentTestResultsResult> {
+    const { flags } = await this.parse(AgentTestResults);
+
+    const agentTester = new AgentTester(flags['target-org'].getConnection(flags['api-version']));
+    const response = await agentTester.details(flags['job-id']);
+    if (flags['result-format'] === 'human') {
+      this.log(await humanFormat(flags['job-id'], response));
+    }
+    return response;
+  }
+}
diff --git a/src/commands/agent/test/resume.ts b/src/commands/agent/test/resume.ts
new file mode 100644
index 0000000..2087612
--- /dev/null
+++ b/src/commands/agent/test/resume.ts
@@ -0,0 +1,80 @@
+/*
+ * Copyright (c) 2023, salesforce.com, inc.
+ * All rights reserved.
+ * Licensed under the BSD 3-Clause license.
+ * For full license text, see LICENSE.txt file in the repo root or https://opensource.org/licenses/BSD-3-Clause
+ */
+
+import { SfCommand, Flags } from '@salesforce/sf-plugins-core';
+import { Messages } from '@salesforce/core';
+import { AgentTester, humanFormat } from '@salesforce/agents';
+import { AgentTestCache } from '../../../agentTestCache.js';
+import { TestStages } from '../../../testStages.js';
+import { resultFormatFlag } from '../../../flags.js';
+
+Messages.importMessagesDirectoryFromMetaUrl(import.meta.url);
+const messages = Messages.loadMessages('@salesforce/plugin-agent', 'agent.test.resume');
+
+export type AgentTestResumeResult = {
+  aiEvaluationId: string;
+  status: string;
+};
+
+export default class AgentTestResume extends SfCommand<AgentTestResumeResult> {
+  public static readonly summary = messages.getMessage('summary');
+  public static readonly description = messages.getMessage('description');
+  public static readonly examples = messages.getMessages('examples');
+  public static readonly state = 'beta';
+
+  public static readonly flags = {
+    'target-org': Flags.requiredOrg(),
+    'api-version': Flags.orgApiVersion(),
+    'job-id': Flags.string({
+      char: 'i',
+      summary: messages.getMessage('flags.job-id.summary'),
+      exactlyOne: ['use-most-recent', 'job-id'],
+    }),
+    'use-most-recent': Flags.boolean({
+      char: 'r',
+      summary: messages.getMessage('flags.use-most-recent.summary'),
+      exactlyOne: ['use-most-recent', 'job-id'],
+    }),
+    wait: Flags.duration({
+      char: 'w',
+      unit: 'minutes',
+      min: 1,
+      defaultValue: 5,
+      summary: messages.getMessage('flags.wait.summary'),
+      description: messages.getMessage('flags.wait.description'),
+    }),
+    'result-format': resultFormatFlag(),
+  };
+
+  public async run(): Promise<AgentTestResumeResult> {
+    const { flags } = await this.parse(AgentTestResume);
+
+    const agentTestCache = await AgentTestCache.create();
+    const { name, aiEvaluationId } = agentTestCache.useIdOrMostRecent(flags['job-id'], flags['use-most-recent']);
+
+    const mso = new TestStages({
+      title: `Agent Test Run: ${name ?? aiEvaluationId}`,
+      jsonEnabled: this.jsonEnabled(),
+    });
+    mso.start({ id: aiEvaluationId });
+    const agentTester = new AgentTester(flags['target-org'].getConnection(flags['api-version']));
+
+    const { completed, response } = await mso.poll(agentTester, aiEvaluationId, flags.wait);
+    if (completed) await agentTestCache.removeCacheEntry(aiEvaluationId);
+
+    mso.stop();
+
+    if (response && flags['result-format'] === 'human') {
+      this.log(await humanFormat(name ?? aiEvaluationId, response));
+    }
+
+    return {
+      status: 'COMPLETED',
+      aiEvaluationId,
+    };
+  }
+}
diff --git a/src/commands/agent/test/run.ts b/src/commands/agent/test/run.ts
index c78c812..1b2a2c9 100644
--- a/src/commands/agent/test/run.ts
+++ b/src/commands/agent/test/run.ts
@@ -7,32 +7,38 @@
 
 import { SfCommand, Flags } from '@salesforce/sf-plugins-core';
 import { Messages } from '@salesforce/core';
+import { AgentTester, humanFormat } from '@salesforce/agents';
+import { colorize } from '@oclif/core/ux';
+import { resultFormatFlag } from '../../../flags.js';
+import { AgentTestCache } from '../../../agentTestCache.js';
+import { TestStages } from '../../../testStages.js';
 
 Messages.importMessagesDirectoryFromMetaUrl(import.meta.url);
 const messages = Messages.loadMessages('@salesforce/plugin-agent', 'agent.test.run');
 
+// TODO: this should include details and status
 export type AgentTestRunResult = {
-  jobId: string; // AiEvaluation.Id
-  success: boolean;
-  errorCode?: string;
-  message?: string;
+  aiEvaluationId: string;
+  status: string;
 };
 
 export default class AgentTestRun extends SfCommand<AgentTestRunResult> {
   public static readonly summary = messages.getMessage('summary');
   public static readonly description = messages.getMessage('description');
   public static readonly examples = messages.getMessages('examples');
-  public static state = 'beta';
+  public static readonly state = 'beta';
 
   public static readonly flags = {
     'target-org': Flags.requiredOrg(),
-    // AiEvalDefinitionVersion.Id -- This should really be "test-name"
-    id: Flags.string({
-      char: 'i',
+    'api-version': Flags.orgApiVersion(),
+    name: Flags.string({
+      char: 'n',
       required: true,
-      summary: messages.getMessage('flags.id.summary'),
-      description: messages.getMessage('flags.id.description'),
+      summary: messages.getMessage('flags.name.summary'),
+      description: messages.getMessage('flags.name.description'),
     }),
+    // we want to pass `undefined` to the API
+    // eslint-disable-next-line sf-plugin/flag-min-max-default
     wait: Flags.duration({
       char: 'w',
       unit: 'minutes',
@@ -40,31 +46,46 @@ export default class AgentTestRun extends SfCommand<AgentTestRunResult> {
       summary: messages.getMessage('flags.wait.summary'),
       description: messages.getMessage('flags.wait.description'),
     }),
-    'output-dir': Flags.directory({
-      char: 'd',
-      summary: messages.getMessage('flags.output-dir.summary'),
-    }),
-    //
-    // Future flags:
-    //   result-format [csv, json, table, junit, TAP]
-    //   suites [array of suite names]
-    //   verbose [boolean]
-    //   ??? api-version or build-version ???
+    'result-format': resultFormatFlag(),
   };
 
   public async run(): Promise<AgentTestRunResult> {
     const { flags } = await this.parse(AgentTestRun);
 
-    this.log(`Starting tests for AiEvalDefinitionVersion: ${flags.id}`);
+    const mso = new TestStages({ title: `Agent Test Run: ${flags.name}`, jsonEnabled: this.jsonEnabled() });
+    mso.start();
+
+    const agentTester = new AgentTester(flags['target-org'].getConnection(flags['api-version']));
+    const response = await agentTester.start(flags.name);
+
+    mso.update({ id: response.aiEvaluationId });
+
+    const agentTestCache = await AgentTestCache.create();
+    await agentTestCache.createCacheEntry(response.aiEvaluationId, flags.name);
+
+    if (flags.wait?.minutes) {
+      const { completed, response: detailsResponse } = await mso.poll(agentTester, response.aiEvaluationId, flags.wait);
+      if (completed) await agentTestCache.removeCacheEntry(response.aiEvaluationId);
 
-    // Call SF Eval Connect API passing AiEvalDefinitionVersion.Id
-    // POST to /einstein/ai-evaluations/{aiEvalDefinitionVersionId}/start
+      mso.stop();
 
-    // Returns: AiEvaluation.Id
+      if (detailsResponse && flags['result-format'] === 'human') {
+        this.log(await humanFormat(flags.name, detailsResponse));
+      }
+      return {
+        status: 'COMPLETED',
+        aiEvaluationId: response.aiEvaluationId,
+      };
+    } else {
+      mso.stop();
+      this.log(
+        `Run ${colorize(
+          'dim',
+          `sf agent test resume --job-id ${response.aiEvaluationId}`
+        )} to resuming watching this test.`
+      );
+    }
 
-    return {
-      success: true,
-      jobId: '4KBSM000000003F4AQ', // AiEvaluation.Id; needed for getting status and stopping
-    };
+    return response;
   }
 }
diff --git a/src/flags.ts b/src/flags.ts
new file mode 100644
index 0000000..418e308
--- /dev/null
+++ b/src/flags.ts
@@ -0,0 +1,22 @@
+/*
+ * Copyright (c) 2024, salesforce.com, inc.
+ * All rights reserved.
+ * Licensed under the BSD 3-Clause license.
+ * For full license text, see LICENSE.txt file in the repo root or https://opensource.org/licenses/BSD-3-Clause
+ */
+import { Flags } from '@salesforce/sf-plugins-core';
+import { Messages } from '@salesforce/core';
+
+Messages.importMessagesDirectoryFromMetaUrl(import.meta.url);
+const messages = Messages.loadMessages('@salesforce/plugin-agent', 'shared');
+
+export const resultFormatFlag = Flags.option({
+  options: [
+    'json',
+    'human',
+    // 'tap',
+    // 'junit'
+  ] as const,
+  default: 'human',
+  summary: messages.getMessage('flags.result-format.summary'),
+});
diff --git a/src/testStages.ts b/src/testStages.ts
new file mode 100644
index 0000000..8c0f9ec
--- /dev/null
+++ b/src/testStages.ts
@@ -0,0 +1,129 @@
+/*
+ * Copyright (c) 2024, salesforce.com, inc.
+ * All rights reserved.
+ * Licensed under the BSD 3-Clause license.
+ * For full license text, see LICENSE.txt file in the repo root or https://opensource.org/licenses/BSD-3-Clause
+ */
+
+import { colorize } from '@oclif/core/ux';
+import { MultiStageOutput } from '@oclif/multi-stage-output';
+import { AgentTestDetailsResponse, AgentTester } from '@salesforce/agents';
+import { Lifecycle } from '@salesforce/core';
+import { Duration } from '@salesforce/kit';
+import { Ux } from '@salesforce/sf-plugins-core';
+
+type Data = {
+  id: string;
+  status: string;
+  totalTestCases: number;
+  passingTestCases: number;
+  failingTestCases: number;
+};
+
+const isTimeoutError = (e: unknown): e is { name: 'PollingClientTimeout' } =>
+  (e as { name: string })?.name === 'PollingClientTimeout';
+
+export class TestStages {
+  private mso: MultiStageOutput<Data>;
+  private ux: Ux;
+
+  public constructor({ title, jsonEnabled }: { title: string; jsonEnabled: boolean }) {
+    this.ux = new Ux({ jsonEnabled });
+    this.mso = new MultiStageOutput<Data>({
+      title,
+      jsonEnabled,
+      stages: ['Starting Tests', 'Polling for Test Results'],
+      stageSpecificBlock: [
+        {
+          stage: 'Polling for Test Results',
+          type: 'dynamic-key-value',
+          label: 'Status',
+          get: (data): string | undefined => data?.status,
+        },
+        {
+          stage: 'Polling for Test Results',
+          type: 'dynamic-key-value',
+          label: 'Completed Tests',
+          get: (data): string | undefined =>
+            data?.totalTestCases && data?.passingTestCases && data?.failingTestCases
+              ? `${data?.passingTestCases + data?.failingTestCases}/${data?.totalTestCases}`
+              : undefined,
+        },
+        {
+          stage: 'Polling for Test Results',
+          type: 'dynamic-key-value',
+          label: 'Passing Tests',
+          get: (data): string | undefined => data?.passingTestCases?.toString(),
+        },
+        {
+          stage: 'Polling for Test Results',
+          type: 'dynamic-key-value',
+          label: 'Failing Tests',
+          get: (data): string | undefined => data?.failingTestCases?.toString(),
+        },
+      ],
+      postStagesBlock: [
+        {
+          type: 'dynamic-key-value',
+          label: 'Job ID',
+          get: (data): string | undefined => data?.id,
+        },
+      ],
+    });
+  }
+
+  public start(data?: Partial<Data>): void {
+    this.mso.skipTo('Starting Tests', data);
+  }
+
+  public async poll(
+    agentTester: AgentTester,
+    id: string,
+    wait: Duration
+  ): Promise<{ completed: boolean; response?: AgentTestDetailsResponse }> {
+    this.mso.skipTo('Polling for Test Results');
+    const lifecycle = Lifecycle.getInstance();
+    lifecycle.on(
+      'AGENT_TEST_POLLING_EVENT',
+      async (event: {
+        status: string;
+        completedTestCases: number;
+        totalTestCases: number;
+        failingTestCases: number;
+        passingTestCases: number;
+      }) => Promise.resolve(this.update(event))
+    );
+
+    try {
+      const response = await agentTester.poll(id, { timeout: wait });
+      this.stop();
+      return { completed: true, response };
+    } catch (e) {
+      if (isTimeoutError(e)) {
+        this.stop('async');
+        this.ux.log(`Client timed out after ${wait.minutes} minutes.`);
+        this.ux.log(`Run ${colorize('dim', `sf agent test resume --job-id ${id}`)} to resuming watching this test.`);
+        return { completed: true };
+      } else {
+        this.error();
+        throw e;
+      }
+    }
+  }
+
+  public update(data: Partial<Data>): void {
+    this.mso.updateData(data);
+  }
+
+  public stop(finalStatus?: 'async'): void {
+    this.mso.stop(finalStatus);
+  }
+
+  public error(): void {
+    this.mso.error();
+  }
+
+  public done(data?: Partial<Data>): void {
+    this.mso.skipTo('Done', data);
+  }
+}
diff --git a/test/agentTestCache.test.ts b/test/agentTestCache.test.ts
new file mode 100644
index 0000000..11d758b
--- /dev/null
+++ b/test/agentTestCache.test.ts
@@ -0,0 +1,112 @@
+/*
+ * Copyright (c) 2024, salesforce.com, inc.
+ * All rights reserved.
+ * Licensed under the BSD 3-Clause license.
+ * For full license text, see LICENSE.txt file in the repo root or https://opensource.org/licenses/BSD-3-Clause
+ */
+import { expect } from 'chai';
+import { SfError } from '@salesforce/core';
+import sinon from 'sinon';
+import { AgentTestCache } from '../src/agentTestCache.js';
+
+describe('AgentTestCache', () => {
+  let cache: AgentTestCache;
+
+  beforeEach(() => {
+    cache = new AgentTestCache(AgentTestCache.getDefaultOptions());
+  });
+
+  afterEach(() => {
+    sinon.restore();
+  });
+
+  describe('createCacheEntry', () => {
+    it('should create a cache entry', async () => {
+      const writeStub = sinon.stub(cache, 'write').resolves();
+      await cache.createCacheEntry('123', 'testName');
+      const entry = cache.get('123');
+      expect(entry.aiEvaluationId).to.equal('123');
+      expect(entry.name).to.equal('testName');
+      expect(writeStub.calledOnce).to.be.true;
+    });
+
+    it('should throw an error if aiEvaluationId is not provided', async () => {
+      try {
+        await cache.createCacheEntry('', 'testName');
+      } catch (e) {
+        expect(e).to.be.instanceOf(SfError);
+        expect((e as SfError).message).to.equal('aiEvaluationId is required to create a cache entry');
+      }
+    });
+  });
+
+  describe('removeCacheEntry', () => {
+    it('should remove a cache entry', async () => {
+      const writeStub = sinon.stub(cache, 'write').resolves();
+      await cache.createCacheEntry('123', 'testName');
+      await cache.removeCacheEntry('123');
+      expect(cache.get('123')).to.be.undefined;
+      expect(writeStub.calledTwice).to.be.true;
+    });
+
+    it('should throw an error if aiEvaluationId is not provided', async () => {
+      try {
+        await cache.removeCacheEntry('');
+      } catch (e) {
+        expect(e).to.be.instanceOf(SfError);
+        expect((e as SfError).message).to.equal('aiEvaluationId is required to remove a cache entry');
+      }
+    });
+  });
+
+  describe('resolveFromCache', () => {
+    it('should resolve the most recent cache entry', async () => {
+      sinon.stub(cache, 'getLatestKey').returns('123');
+      await cache.createCacheEntry('123', 'testName');
+      const result = cache.resolveFromCache();
+      expect(result.aiEvaluationId).to.equal('123');
+      expect(result.name).to.equal('testName');
+    });
+
+    it('should throw an error if no cache entry is found', () => {
+      sinon.stub(cache, 'getLatestKey').returns(undefined);
+      try {
+        cache.resolveFromCache();
+      } catch (e) {
+        expect(e).to.be.instanceOf(SfError);
+        expect((e as SfError).message).to.equal('Could not find an aiEvaluationId to resume');
+      }
+    });
+  });
+
+  describe('useIdOrMostRecent', () => {
+    it('should return the provided aiEvaluationId', () => {
+      const result = cache.useIdOrMostRecent('123', false);
+      expect(result).to.deep.equal({ aiEvaluationId: '123' });
+    });
+
+    it('should return the most recent cache entry', async () => {
+      sinon.stub(cache, 'resolveFromCache').returns({ aiEvaluationId: '123', name: 'testName' });
+      const result = cache.useIdOrMostRecent(undefined, true);
+      expect(result).to.deep.equal({ aiEvaluationId: '123', name: 'testName' });
+    });
+
+    it('should throw an error if both aiEvaluationId and useMostRecent are provided', () => {
+      try {
+        cache.useIdOrMostRecent('123', true);
+      } catch (e) {
+        expect(e).to.be.instanceOf(SfError);
+        expect((e as SfError).message).to.equal('Cannot specify both an aiEvaluationId and use most recent flag');
+      }
+    });
+
+    it('should throw an error if neither aiEvaluationId nor useMostRecent are provided', () => {
+      try {
+        cache.useIdOrMostRecent(undefined, false);
+      } catch (e) {
+        expect(e).to.be.instanceOf(SfError);
+        expect((e as SfError).message).to.equal('Must specify either an aiEvaluationId or use most recent flag');
+      }
+    });
+  });
+});
diff --git a/test/commands/agent/test/cancel.nut.ts b/test/commands/agent/test/cancel.nut.ts
new file mode 100644
index 0000000..b6d5671
--- /dev/null
+++ b/test/commands/agent/test/cancel.nut.ts
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2024, salesforce.com, inc.
+ * All rights reserved.
+ * Licensed under the BSD 3-Clause license.
+ * For full license text, see LICENSE.txt file in the repo root or https://opensource.org/licenses/BSD-3-Clause
+ */
+import { resolve } from 'node:path';
+import { execCmd, TestSession } from '@salesforce/cli-plugins-testkit';
+import { expect } from 'chai';
+import { AgentTestRunResult } from '../../../../src/commands/agent/test/run.js';
+import { AgentTestCancelResult } from '../../../../src/commands/agent/test/cancel.js';
+import { AgentTestCache } from '../../../../src/agentTestCache.js';
+
+describe('agent test cancel NUTs', () => {
+  let session: TestSession;
+  const mockDir = resolve('test/mocks');
+
+  before(async () => {
+    session = await TestSession.create({
+      devhubAuthStrategy: 'AUTO',
+      project: { name: 'agentTestRun' },
+    });
+  });
+
+  after(async () => {
+    await session?.clean();
+  });
+
+  it('should cancel async test run', async () => {
+    const runResult = execCmd<AgentTestRunResult>(
+      `agent test run --name my_agent_tests --target-org ${session.hubOrg.username} --json`,
+      {
+        ensureExitCode: 0,
+        env: { ...process.env, SF_MOCK_DIR: mockDir },
+      }
+    ).jsonOutput;
+
+    expect(runResult?.result.aiEvaluationId).to.be.ok;
+
+    const output = execCmd<AgentTestCancelResult>(
+      `agent test cancel --job-id ${runResult?.result.aiEvaluationId} --target-org ${session.hubOrg.username} --json`,
+      {
+        ensureExitCode: 0,
+        env: { ...process.env, SF_MOCK_DIR: mockDir },
+      }
+    ).jsonOutput;
+
+    expect(output?.result.success).to.be.true;
+    expect(output?.result.aiEvaluationId).to.equal('4KBSM000000003F4AQ');
+
+    // check that cache does not have an entry
+    const cache = await AgentTestCache.create();
+    expect(() => cache.resolveFromCache()).to.throw('Could not find an aiEvaluationId to resume');
+  });
+});
diff --git a/test/commands/agent/test/results.nut.ts b/test/commands/agent/test/results.nut.ts
new file mode 100644
index 0000000..5004e91
--- /dev/null
+++ b/test/commands/agent/test/results.nut.ts
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2024, salesforce.com, inc.
+ * All rights reserved.
+ * Licensed under the BSD 3-Clause license.
+ * For full license text, see LICENSE.txt file in the repo root or https://opensource.org/licenses/BSD-3-Clause
+ */
+import { resolve } from 'node:path';
+import { execCmd, TestSession } from '@salesforce/cli-plugins-testkit';
+import { expect } from 'chai';
+import { AgentTestRunResult } from '../../../../src/commands/agent/test/run.js';
+import { AgentTestResultsResult } from '../../../../src/commands/agent/test/results.js';
+import { AgentTestCache } from '../../../../src/agentTestCache.js';
+
+describe('agent test results NUTs', () => {
+  let session: TestSession;
+  const mockDir = resolve('test/mocks');
+
+  before(async () => {
+    session = await TestSession.create({
+      devhubAuthStrategy: 'AUTO',
+      project: { name: 'agentTestRun' },
+    });
+  });
+
+  after(async () => {
+    await session?.clean();
+  });
+
+  it('should get results of completed test run', async () => {
+    const runResult = execCmd<AgentTestRunResult>(
+      `agent test run --name my_agent_tests --target-org ${session.hubOrg.username} --wait 5 --json`,
+      {
+        ensureExitCode: 0,
+        env: { ...process.env, SF_MOCK_DIR: mockDir },
+      }
+    ).jsonOutput;
+
+    expect(runResult?.result.aiEvaluationId).to.be.ok;
+    expect(runResult?.result.status).to.equal('COMPLETED');
+
+    const output = execCmd<AgentTestResultsResult>(
+      `agent test results --job-id ${runResult?.result.aiEvaluationId} --target-org ${session.hubOrg.username} --json`,
+      {
+        ensureExitCode: 0,
+        env: { ...process.env, SF_MOCK_DIR: mockDir },
+      }
+    ).jsonOutput;
+
+    expect(output?.result.status).to.equal('COMPLETED');
+    expect(output?.result.testCases.length).to.equal(2);
+
+    // check that cache does not have an entry
+    const cache = await AgentTestCache.create();
+    expect(() => cache.resolveFromCache()).to.throw('Could not find an aiEvaluationId to resume');
+  });
+});
diff --git a/test/commands/agent/test/resume.nut.ts b/test/commands/agent/test/resume.nut.ts
new file mode 100644
index 0000000..bc84d67
--- /dev/null
+++ b/test/commands/agent/test/resume.nut.ts
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2024, salesforce.com, inc.
+ * All rights reserved.
+ * Licensed under the BSD 3-Clause license.
+ * For full license text, see LICENSE.txt file in the repo root or https://opensource.org/licenses/BSD-3-Clause
+ */
+import { resolve } from 'node:path';
+import { execCmd, TestSession } from '@salesforce/cli-plugins-testkit';
+import { expect } from 'chai';
+import { AgentTestRunResult } from '../../../../src/commands/agent/test/run.js';
+import { AgentTestResumeResult } from '../../../../src/commands/agent/test/resume.js';
+import { AgentTestCache } from '../../../../src/agentTestCache.js';
+
+describe('agent test resume NUTs', () => {
+  let session: TestSession;
+  const mockDir = resolve('test/mocks');
+
+  before(async () => {
+    session = await TestSession.create({
+      devhubAuthStrategy: 'AUTO',
+      project: { name: 'agentTestRun' },
+    });
+  });
+
+  after(async () => {
+    await session?.clean();
+  });
+
+  it('should resume async test run', async () => {
+    const runResult = execCmd<AgentTestRunResult>(
+      `agent test run --name my_agent_tests --target-org ${session.hubOrg.username} --json`,
+      {
+        ensureExitCode: 0,
+        env: { ...process.env, SF_MOCK_DIR: mockDir },
+      }
+    ).jsonOutput;
+
+    expect(runResult?.result.aiEvaluationId).to.be.ok;
+
+    const output = execCmd<AgentTestResumeResult>(
+      `agent test resume --job-id ${runResult?.result.aiEvaluationId} --target-org ${session.hubOrg.username} --json`,
+      {
+        ensureExitCode: 0,
+        env: { ...process.env, SF_MOCK_DIR: mockDir },
+      }
+    ).jsonOutput;
+
+    expect(output?.result.status).to.equal('COMPLETED');
+    expect(output?.result.aiEvaluationId).to.equal('4KBSM000000003F4AQ');
+
+    // check that cache does not have an entry
+    const cache = await AgentTestCache.create();
+    expect(() => cache.resolveFromCache()).to.throw('Could not find an aiEvaluationId to resume');
+  });
+});
diff --git a/test/commands/agent/test/run.nut.ts b/test/commands/agent/test/run.nut.ts
new file mode 100644
index 0000000..13c3a99
--- /dev/null
+++ b/test/commands/agent/test/run.nut.ts
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2024, salesforce.com, inc.
+ * All rights reserved.
+ * Licensed under the BSD 3-Clause license.
+ * For full license text, see LICENSE.txt file in the repo root or https://opensource.org/licenses/BSD-3-Clause
+ */
+import { resolve } from 'node:path';
+import { execCmd, TestSession } from '@salesforce/cli-plugins-testkit';
+import { expect } from 'chai';
+import { AgentTestRunResult } from '../../../../src/commands/agent/test/run.js';
+import { AgentTestCache } from '../../../../src/agentTestCache.js';
+
+describe('agent test run NUTs', () => {
+  let session: TestSession;
+  const mockDir = resolve('test/mocks');
+
+  before(async () => {
+    session = await TestSession.create({
+      devhubAuthStrategy: 'AUTO',
+      project: { name: 'agentTestRun' },
+    });
+  });
+
+  after(async () => {
+    await session?.clean();
+  });
+
+  it('should start async test run', async () => {
+    const name = 'my_agent_tests';
+    const command = `agent test run --name ${name} --target-org ${session.hubOrg.username} --json`;
+    const output = execCmd<AgentTestRunResult>(command, {
+      ensureExitCode: 0,
+      env: { ...process.env, SF_MOCK_DIR: mockDir },
+    }).jsonOutput;
+    expect(output?.result.status).to.equal('NEW');
+    expect(output?.result.aiEvaluationId).to.equal('4KBSM000000003F4AQ');
+
+    // check cache for test run entry
+    const cache = await AgentTestCache.create();
+    const testRun = cache.resolveFromCache();
+    expect(testRun.aiEvaluationId).to.equal('4KBSM000000003F4AQ');
+    expect(testRun.name).to.equal(name);
+  });
+
+  it('should poll for test run completion when --wait is used', async () => {
+    const name = 'my_agent_tests';
+    const command = `agent test run --name ${name} --target-org ${session.hubOrg.username} --wait 5 --json`;
+    const output = execCmd<AgentTestRunResult>(command, {
+      ensureExitCode: 0,
+      env: { ...process.env, SF_MOCK_DIR: mockDir },
+    }).jsonOutput;
+
+    expect(output?.result.status).to.equal('COMPLETED');
+    expect(output?.result.aiEvaluationId).to.equal('4KBSM000000003F4AQ');
+
+    // check that cache does not have an entry
+    const cache = await AgentTestCache.create();
+    expect(() => cache.resolveFromCache()).to.throw('Could not find an aiEvaluationId to resume');
+  });
+});
diff --git a/test/mocks/connect_agent-job-spec.json b/test/mocks/connect_agent-job-spec.json
new file mode 100644
index 0000000..5f32c3b
--- /dev/null
+++ b/test/mocks/connect_agent-job-spec.json
@@ -0,0 +1,90 @@
+{
+  "isSuccess": true,
+  "type": "customer_facing",
+  "role": "replace me",
+  "companyName": "replace me",
+  "companyDescription": "replace me",
+  "companyWebsite": "replace me",
+  "jobSpecs": [
+    {
+      "jobTitle": "Guest_Experience_Enhancement",
+      "jobDescription": "Develop and implement entertainment programs to enhance guest experience."
+    },
+    {
+      "jobTitle": "Event_Planning_and_Execution",
+      "jobDescription": "Plan, organize, and execute resort events and activities."
+    },
+    {
+      "jobTitle": "Vendor_Management",
+      "jobDescription": "Coordinate with external vendors for event supplies and services."
+    },
+    {
+      "jobTitle": "Staff_Training_and_Development",
+      "jobDescription": "Train and develop staff to deliver exceptional entertainment services."
+    },
+    {
+      "jobTitle": "Budget_Management",
+      "jobDescription": "Manage budgets for entertainment activities and events."
+    },
+    {
+      "jobTitle": "Guest_Feedback_Analysis",
+      "jobDescription": "Collect and analyze guest feedback to improve entertainment offerings."
+    },
+    {
+      "jobTitle": "Marketing_Collaboration",
+      "jobDescription": "Work with marketing to promote events and entertainment activities."
+    },
+    {
+      "jobTitle": "Technology_Integration",
+      "jobDescription": "Utilize technology to enhance guest engagement and streamline operations."
+    },
+    {
+      "jobTitle": "Safety_and_Compliance",
+      "jobDescription": "Ensure all entertainment activities comply with safety regulations."
+    },
+    {
+      "jobTitle": "Performance_Monitoring",
+      "jobDescription": "Monitor and evaluate the performance of entertainment programs."
+    },
+    {
+      "jobTitle": "Community_Partnerships",
+      "jobDescription": "Build partnerships with local artists and performers."
+    },
+    {
+      "jobTitle": "Inventory_Management",
+      "jobDescription": "Manage inventory of entertainment equipment and supplies."
+    },
+    {
+      "jobTitle": "Custom_Experience_Creation",
+      "jobDescription": "Design personalized entertainment experiences for VIP guests."
+    },
+    {
+      "jobTitle": "Data_Reporting",
+      "jobDescription": "Generate reports on entertainment program performance and guest satisfaction."
+    },
+    {
+      "jobTitle": "Crisis_Management",
+      "jobDescription": "Develop plans to handle emergencies during entertainment events."
+    },
+    {
+      "jobTitle": "Digital_Engagement",
+      "jobDescription": "Enhance online presence and engagement through social media."
+    },
+    {
+      "jobTitle": "Salesforce_Integration",
+      "jobDescription": "Utilize Salesforce to track guest preferences and tailor entertainment."
+    },
+    {
+      "jobTitle": "Trend_Analysis",
+      "jobDescription": "Stay updated on industry trends to keep entertainment offerings fresh."
+    },
+    {
+      "jobTitle": "Cross_Department_Coordination",
+      "jobDescription": "Collaborate with other departments to ensure seamless guest experience."
+    },
+    {
+      "jobTitle": "Resource_Optimization",
+      "jobDescription": "Optimize the use of resources to maximize guest satisfaction."
+    }
+  ]
+}
diff --git a/test/mocks/einstein_ai-evaluations_runs.json b/test/mocks/einstein_ai-evaluations_runs.json
new file mode 100644
index 0000000..87f063c
--- /dev/null
+++ b/test/mocks/einstein_ai-evaluations_runs.json
@@ -0,0 +1,4 @@
+{
+  "aiEvaluationId": "4KBSM000000003F4AQ",
+  "status": "NEW"
+}
diff --git a/test/mocks/einstein_ai-evaluations_runs_4KBSM000000003F4AQ/1.json b/test/mocks/einstein_ai-evaluations_runs_4KBSM000000003F4AQ/1.json
new file mode 100644
index 0000000..daf2bbc
--- /dev/null
+++ b/test/mocks/einstein_ai-evaluations_runs_4KBSM000000003F4AQ/1.json
@@ -0,0 +1,4 @@
+{
+  "status": "IN_PROGRESS",
+  "startTime": "2024-11-13T15:00:00.000Z"
+}
diff --git a/test/mocks/einstein_ai-evaluations_runs_4KBSM000000003F4AQ/2.json b/test/mocks/einstein_ai-evaluations_runs_4KBSM000000003F4AQ/2.json
new file mode 100644
index 0000000..daf2bbc
--- /dev/null
+++ b/test/mocks/einstein_ai-evaluations_runs_4KBSM000000003F4AQ/2.json
@@ -0,0 +1,4 @@
+{
+  "status": "IN_PROGRESS",
+  "startTime": "2024-11-13T15:00:00.000Z"
+}
diff --git a/test/mocks/einstein_ai-evaluations_runs_4KBSM000000003F4AQ/3.json b/test/mocks/einstein_ai-evaluations_runs_4KBSM000000003F4AQ/3.json
new file mode 100644
index 0000000..d4f6503
--- /dev/null
+++ b/test/mocks/einstein_ai-evaluations_runs_4KBSM000000003F4AQ/3.json
@@ -0,0 +1,4 @@
+{
+  "status": "COMPLETED",
+  "startTime": "2024-11-13T15:00:00.000Z"
+}
diff --git a/test/mocks/einstein_ai-evaluations_runs_4KBSM000000003F4AQ_cancel.json b/test/mocks/einstein_ai-evaluations_runs_4KBSM000000003F4AQ_cancel.json
new file mode 100644
index 0000000..5550c6d
--- /dev/null
+++ b/test/mocks/einstein_ai-evaluations_runs_4KBSM000000003F4AQ_cancel.json
@@ -0,0 +1,3 @@
+{
+  "success": true
+}
diff --git a/test/mocks/einstein_ai-evaluations_runs_4KBSM000000003F4AQ_details.json b/test/mocks/einstein_ai-evaluations_runs_4KBSM000000003F4AQ_details.json
new file mode 100644
index 0000000..b895af0
--- /dev/null
+++ b/test/mocks/einstein_ai-evaluations_runs_4KBSM000000003F4AQ_details.json
@@ -0,0 +1,82 @@
+{
+  "status": "COMPLETED",
+  "startTime": "2024-11-28T12:00:00Z",
+  "endTime": "2024-11-28T12:05:00Z",
+  "errorMessage": null,
+  "testCases": [
+    {
+      "status": "COMPLETED",
+      "number": 1,
+      "startTime": "2024-11-28T12:00:10Z",
+      "endTime": "2024-11-28T12:00:20Z",
+      "generatedData": {
+        "type": "AGENT",
+        "actionsSequence": ["Action1", "Action2"],
+        "outcome": "Success",
+        "topic": "Mathematics",
+        "inputTokensCount": 50,
+        "outputTokensCount": 55
+      },
+      "expectationResults": [
+        {
+          "name": "topic_sequence_match",
+          "actualValue": "Result A",
+          "expectedValue": "Result A",
+          "score": 1.0,
+          "result": "Passed",
+          "metricLabel": "Accuracy",
+          "metricExplainability": "Measures the correctness of the result.",
+          "status": "Completed",
+          "startTime": "2024-11-28T12:00:12Z",
+          "endTime": "2024-11-28T12:00:13Z",
+          "errorCode": null,
+          "errorMessage": null
+        },
+        {
+          "name": "action_sequence_match",
+          "actualValue": "Result B",
+          "expectedValue": "Result B",
+          "score": 0.9,
+          "result": "Passed",
+          "metricLabel": "Precision",
+          "metricExplainability": "Measures the precision of the result.",
+          "status": "Completed",
+          "startTime": "2024-11-28T12:00:14Z",
+          "endTime": "2024-11-28T12:00:15Z",
+          "errorCode": null,
+          "errorMessage": null
+        }
+      ]
+    },
+    {
+      "status": "ERROR",
+      "number": 2,
+      "startTime": "2024-11-28T12:00:30Z",
+      "endTime": "2024-11-28T12:00:40Z",
+      "generatedData": {
+        "type": "AGENT",
+        "actionsSequence": ["Action3", "Action4"],
+        "outcome": "Failure",
+        "topic": "Physics",
+        "inputTokensCount": 60,
+        "outputTokensCount": 50
+      },
+      "expectationResults": [
+        {
+          "name": "topic_sequence_match",
+          "actualValue": "Result C",
+          "expectedValue": "Result D",
+          "score": 0.5,
+          "result": "Failed",
+          "metricLabel": "Accuracy",
+          "metricExplainability": "Measures the correctness of the result.",
+          "status": "Completed",
+          "startTime": "2024-11-28T12:00:32Z",
+          "endTime": "2024-11-28T12:00:33Z",
+          "errorCode": null,
+          "errorMessage": null
+        }
+      ]
+    }
+  ]
+}
diff --git a/test/nut/agent-test-run.nut.ts b/test/nut/agent-test-run.nut.ts
deleted file mode 100644
index 01a4f84..0000000
--- a/test/nut/agent-test-run.nut.ts
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * Copyright (c) 2021, salesforce.com, inc.
- * All rights reserved.
- * Licensed under the BSD 3-Clause license.
- * For full license text, see LICENSE.txt file in the repo root or https://opensource.org/licenses/BSD-3-Clause
- */
-
-import { execCmd, TestSession } from '@salesforce/cli-plugins-testkit';
-import { expect } from 'chai';
-import { AgentTestRunResult } from '../../src/commands/agent/test/run.js';
-
-let testSession: TestSession;
-
-describe('agent test run NUTs', () => {
-  before('prepare session', async () => {
-    testSession = await TestSession.create({
-      devhubAuthStrategy: 'AUTO',
-      scratchOrgs: [
-        {
-          edition: 'developer',
-          setDefault: true,
-        },
-      ],
-    });
-  });
-
-  after(async () => {
-    await testSession?.clean();
-  });
-
-  it('should return a job ID', () => {
-    const result = execCmd<AgentTestRunResult>('agent test run -i 4KBSM000000003F4AQ --json', { ensureExitCode: 0 })
-      .jsonOutput?.result;
-    expect(result?.success).to.equal(true);
-    expect(result?.jobId).to.be.ok;
-  });
-});
diff --git a/test/unit/agent-test-run.test.ts b/test/unit/agent-test-run.test.ts
deleted file mode 100644
index 0bb1cd1..0000000
--- a/test/unit/agent-test-run.test.ts
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- * Copyright (c) 2023, salesforce.com, inc.
- * All rights reserved.
- * Licensed under the BSD 3-Clause license.
- * For full license text, see LICENSE.txt file in the repo root or https://opensource.org/licenses/BSD-3-Clause
- */
-import { MockTestOrgData, TestContext } from '@salesforce/core/testSetup';
-import { expect } from 'chai';
-import { stubSfCommandUx } from '@salesforce/sf-plugins-core';
-import AgentTestRun from '../../src/commands/agent/test/run.js';
-
-describe('agent run test', () => {
-  const $$ = new TestContext();
-  const testOrg = new MockTestOrgData();
-  let sfCommandStubs: ReturnType<typeof stubSfCommandUx>;
-
-  beforeEach(() => {
-    sfCommandStubs = stubSfCommandUx($$.SANDBOX);
-  });
-
-  afterEach(() => {
-    $$.restore();
-  });
-
-  it('runs agent run test', async () => {
-    await AgentTestRun.run(['-i', 'the-id', '-o', testOrg.username]);
-    const output = sfCommandStubs.log
-      .getCalls()
-      .flatMap((c) => c.args)
-      .join('\n');
-    expect(output).to.include('Starting tests for AiEvalDefinitionVersion:');
-  });
-});
diff --git a/yarn.lock b/yarn.lock
index 20356f0..8b1ee5e 100644
--- a/yarn.lock
+++ b/yarn.lock
@@ -1360,15 +1360,23 @@
     strip-ansi "^7.1.0"
     wrap-ansi "^9.0.0"
 
+"@oclif/test@^4.1.0":
+  version "4.1.0"
+  resolved "https://registry.yarnpkg.com/@oclif/test/-/test-4.1.0.tgz#7935e3707cf07480790139e02973196d18d16822"
+  integrity sha512-2ugir6NhRsWJqHM9d2lMEWNiOTD678Jlx5chF/fg6TCAlc7E6E/6+zt+polrCTnTIpih5P/HxOtDekgtjgARwQ==
+  dependencies:
+    ansis "^3.3.2"
+    debug "^4.3.6"
+
 "@pkgjs/parseargs@^0.11.0":
   version "0.11.0"
   resolved "https://registry.yarnpkg.com/@pkgjs/parseargs/-/parseargs-0.11.0.tgz#a77ea742fab25775145434eb1d2328cf5013ac33"
   integrity sha512-+1VkjdD0QBLPodGrJUeqarH8VAIvQODIbwh9XpP5Syisf7YoQgsJKPNFoqqLQlu+VQ/tVSshMR6loPMn8U+dPg==
 
-"@salesforce/agents@^0.2.4":
-  version "0.2.4"
-  resolved "https://registry.yarnpkg.com/@salesforce/agents/-/agents-0.2.4.tgz#df599a7fd69535f30afa1012d0b89fa90edd42af"
-  integrity sha512-ntJqXhIDG1YsS8qfjEnd4m7CIuZF9VAqkv44FL5Fdslh1vNRwiCA21JJtk1+hZk3EN+JHI/NyZEBYtqckv1fHg==
+"@salesforce/agents@^0.3.0":
+  version "0.3.0"
+  resolved "https://registry.yarnpkg.com/@salesforce/agents/-/agents-0.3.0.tgz#5f58d69eca1dde07daaf88bc2226b1a09e579666"
+  integrity sha512-BV/Fa+WN8IT5n+bsdDI8wga5dxjY9Rhu6eAvU3OCyRQ7F0nFd5uqLe2Ybo+0gLbGCvGCrV9gt8eJ5z4fsgLoDQ==
   dependencies:
     "@oclif/table" "^0.3.3"
     "@salesforce/core" "^8.8.0"
@@ -1392,7 +1400,7 @@
     strip-ansi "6.0.1"
     ts-retry-promise "^0.8.1"
 
-"@salesforce/core@^8.5.1", "@salesforce/core@^8.5.2", "@salesforce/core@^8.5.7", "@salesforce/core@^8.6.2", "@salesforce/core@^8.6.3", "@salesforce/core@^8.8.0":
+"@salesforce/core@^8.5.1", "@salesforce/core@^8.5.7", "@salesforce/core@^8.6.2", "@salesforce/core@^8.6.3", "@salesforce/core@^8.8.0":
   version "8.8.0"
   resolved "https://registry.yarnpkg.com/@salesforce/core/-/core-8.8.0.tgz#849c07ea3a2548ca201fc0fe8baef9b36a462194"
   integrity sha512-HWGdRiy/MPCJ2KHz+W+cnqx0O9xhx9+QYvwP8bn9PE27wj0A/NjTi4xrqIWk1M+fE4dXHycE+8qPf4b540euvg==
@@ -1502,7 +1510,7 @@
     string-width "^7.2.0"
     terminal-link "^3.0.0"
 
-"@salesforce/sf-plugins-core@^12", "@salesforce/sf-plugins-core@^12.1.0":
+"@salesforce/sf-plugins-core@^12.1.0":
   version "12.1.0"
   resolved "https://registry.yarnpkg.com/@salesforce/sf-plugins-core/-/sf-plugins-core-12.1.0.tgz#874531acb39755a634ceda5de6462c3b6256baf6"
   integrity sha512-xJXF0WE+4lq2kb/w24wcZc+76EUCIKv7dj1oATugk9JFzYKySdC1smzCY/BhPGzMQGvXcbkWo5PG5iXDBrtwYQ==
@@ -3215,7 +3223,7 @@ dateformat@^4.6.3:
   resolved "https://registry.yarnpkg.com/dateformat/-/dateformat-4.6.3.tgz#556fa6497e5217fedb78821424f8a1c22fa3f4b5"
   integrity sha512-2P0p0pFGzHS5EMnhdxQi7aJN+iMheud0UhG4dlE1DLAlvL8JHjJJTX/CSm4JXwV0Ka5nGk3zC5mcb5bUQUxxMA==
 
-debug@4, debug@^4.1.0, debug@^4.1.1, debug@^4.3.1, debug@^4.3.2, debug@^4.3.4, debug@^4.3.5, debug@^4.3.7:
+debug@4, debug@^4.1.0, debug@^4.1.1, debug@^4.3.1, debug@^4.3.2, debug@^4.3.4, debug@^4.3.5, debug@^4.3.6, debug@^4.3.7:
   version "4.3.7"
   resolved "https://registry.yarnpkg.com/debug/-/debug-4.3.7.tgz#87945b4151a011d76d95a198d7111c865c360a52"
   integrity sha512-Er2nc/H7RrMXZBFCEim6TCmMk02Z8vLC2Rbi1KEBggpo0fS6l0S1nnapwmIi3yW/+GOJap1Krg4w0Hg80oCqgQ==
@@ -6767,16 +6775,7 @@ stack-utils@^2.0.6:
   dependencies:
     escape-string-regexp "^2.0.0"
 
-"string-width-cjs@npm:string-width@^4.2.0":
-  version "4.2.3"
-  resolved "https://registry.yarnpkg.com/string-width/-/string-width-4.2.3.tgz#269c7117d27b05ad2e536830a8ec895ef9c6d010"
-  integrity sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==
-  dependencies:
-    emoji-regex "^8.0.0"
-    is-fullwidth-code-point "^3.0.0"
-    strip-ansi "^6.0.1"
-
-string-width@^4.0.0, string-width@^4.1.0, string-width@^4.2.0, string-width@^4.2.3:
+"string-width-cjs@npm:string-width@^4.2.0", string-width@^4.0.0, string-width@^4.1.0, string-width@^4.2.0, string-width@^4.2.3:
   version "4.2.3"
   resolved "https://registry.yarnpkg.com/string-width/-/string-width-4.2.3.tgz#269c7117d27b05ad2e536830a8ec895ef9c6d010"
   integrity sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==
@@ -6844,14 +6843,7 @@ string_decoder@~1.1.1:
   dependencies:
     safe-buffer "~5.1.0"
 
-"strip-ansi-cjs@npm:strip-ansi@^6.0.1":
-  version "6.0.1"
-  resolved "https://registry.yarnpkg.com/strip-ansi/-/strip-ansi-6.0.1.tgz#9e26c63d30f53443e9489495b2105d37b67a85d9"
-  integrity sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==
-  dependencies:
-    ansi-regex "^5.0.1"
-
-strip-ansi@6.0.1, strip-ansi@^6.0.0, strip-ansi@^6.0.1:
+"strip-ansi-cjs@npm:strip-ansi@^6.0.1", strip-ansi@6.0.1, strip-ansi@^6.0.0, strip-ansi@^6.0.1:
   version "6.0.1"
   resolved "https://registry.yarnpkg.com/strip-ansi/-/strip-ansi-6.0.1.tgz#9e26c63d30f53443e9489495b2105d37b67a85d9"
   integrity sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==
@@ -7418,7 +7410,7 @@ workerpool@^6.5.1:
   resolved "https://registry.yarnpkg.com/workerpool/-/workerpool-6.5.1.tgz#060f73b39d0caf97c6db64da004cd01b4c099544"
   integrity sha512-Fs4dNYcsdpYSAfVxhnl1L5zTksjvOJxtC5hzMNl+1t9B8hTJTdKDyZ5ju7ztgPy+ft9tBFXoOlDNiOT9WUXZlA==
 
-"wrap-ansi-cjs@npm:wrap-ansi@^7.0.0":
+"wrap-ansi-cjs@npm:wrap-ansi@^7.0.0", wrap-ansi@^7.0.0:
   version "7.0.0"
   resolved "https://registry.yarnpkg.com/wrap-ansi/-/wrap-ansi-7.0.0.tgz#67e145cff510a6a6984bdf1152911d69d2eb9e43"
   integrity sha512-YVGIj2kamLSTxw6NsZjoBxfSwsn0ycdesmc4p+Q21c5zPuZ1pl+NfxVdxPtdHvmNVOQ6XSYG4AUtyt/Fi7D16Q==
@@ -7436,15 +7428,6 @@ wrap-ansi@^6.2.0:
     string-width "^4.1.0"
     strip-ansi "^6.0.0"
 
-wrap-ansi@^7.0.0:
-  version "7.0.0"
-  resolved "https://registry.yarnpkg.com/wrap-ansi/-/wrap-ansi-7.0.0.tgz#67e145cff510a6a6984bdf1152911d69d2eb9e43"
-  integrity sha512-YVGIj2kamLSTxw6NsZjoBxfSwsn0ycdesmc4p+Q21c5zPuZ1pl+NfxVdxPtdHvmNVOQ6XSYG4AUtyt/Fi7D16Q==
-  dependencies:
-    ansi-styles "^4.0.0"
-    string-width "^4.1.0"
-    strip-ansi "^6.0.0"
-
 wrap-ansi@^8.1.0:
   version "8.1.0"
   resolved "https://registry.yarnpkg.com/wrap-ansi/-/wrap-ansi-8.1.0.tgz#56dc22368ee570face1b49819975d9b9a5ead214"