Merge pull request #6 from forcedotcom/mdonnalley/agent-tests

feat: mock agent tests
forcedotcom · Dec 2, 2024 · e7f7698 · e7f7698
2 parents 129ff9c + 8d346c9
commit e7f7698
Show file tree

Hide file tree

Showing 18 changed files with 1,331 additions and 196 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,69 +1,46 @@
 ## [0.1.6](https://github.com/forcedotcom/agents/compare/0.1.5...0.1.6) (2024-11-16)
 
-
 ### Bug Fixes
 
-* **deps:** bump cross-spawn from 7.0.3 to 7.0.5 ([7f43cc7](https://github.com/forcedotcom/agents/commit/7f43cc706b848fd54c88d04bee2c0b7b632d7e76))
-
-
+- **deps:** bump cross-spawn from 7.0.3 to 7.0.5 ([7f43cc7](https://github.com/forcedotcom/agents/commit/7f43cc706b848fd54c88d04bee2c0b7b632d7e76))
 
 ## [0.1.5](https://github.com/forcedotcom/agents/compare/0.1.4...0.1.5) (2024-11-16)
 
-
 ### Bug Fixes
 
-* **deps:** bump @salesforce/core from 8.6.3 to 8.8.0 ([193237b](https://github.com/forcedotcom/agents/commit/193237b5dbbe7ce1ee596a3b7305b5602d0883f8))
-
-
+- **deps:** bump @salesforce/core from 8.6.3 to 8.8.0 ([193237b](https://github.com/forcedotcom/agents/commit/193237b5dbbe7ce1ee596a3b7305b5602d0883f8))
 
 ## [0.1.4](https://github.com/forcedotcom/agents/compare/0.1.3...0.1.4) (2024-11-12)
 
-
 ### Bug Fixes
 
-* do not append spec in name ([284d5d5](https://github.com/forcedotcom/agents/commit/284d5d56ed99c67b93a65904a00fdb00a2552a0e))
-
-
+- do not append spec in name ([284d5d5](https://github.com/forcedotcom/agents/commit/284d5d56ed99c67b93a65904a00fdb00a2552a0e))
 
 ## [0.1.3](https://github.com/forcedotcom/agents/compare/0.1.2...0.1.3) (2024-11-12)
 
-
 ### Bug Fixes
 
-* use latest ([92ecbba](https://github.com/forcedotcom/agents/commit/92ecbbabc403fe57bf4069f9928b029d23db7a16))
-
-
+- use latest ([92ecbba](https://github.com/forcedotcom/agents/commit/92ecbbabc403fe57bf4069f9928b029d23db7a16))
 
 ## [0.1.2](https://github.com/forcedotcom/agents/compare/0.1.1...0.1.2) (2024-11-12)
 
-
 ### Bug Fixes
 
-* publish to preview ([3f5ccb6](https://github.com/forcedotcom/agents/commit/3f5ccb687017186eb29b8b18c7fdce33daee1f70))
-
-
+- publish to preview ([3f5ccb6](https://github.com/forcedotcom/agents/commit/3f5ccb687017186eb29b8b18c7fdce33daee1f70))
 
 ## [0.1.1](https://github.com/forcedotcom/agents/compare/0.1.0...0.1.1) (2024-11-10)
 
-
 ### Bug Fixes
 
-* export Agent class ([6c42b63](https://github.com/forcedotcom/agents/commit/6c42b63bbe9a5a5cf6fa0cea8f5649d07aaa6adc))
-
-
+- export Agent class ([6c42b63](https://github.com/forcedotcom/agents/commit/6c42b63bbe9a5a5cf6fa0cea8f5649d07aaa6adc))
 
 # [0.1.0](https://github.com/forcedotcom/agents/compare/0c5d8d6ab9e9a8470c7192a56350567882a3017b...0.1.0) (2024-11-09)
 
-
 ### Bug Fixes
 
-* improve types and linting ([d5a6cb3](https://github.com/forcedotcom/agents/commit/d5a6cb3348e63d52e10540e99cf509be64a26649))
-* revise readme and version ([f690b7f](https://github.com/forcedotcom/agents/commit/f690b7f8a911315f467f00f5f533e22e92c69a9e))
-
+- improve types and linting ([d5a6cb3](https://github.com/forcedotcom/agents/commit/d5a6cb3348e63d52e10540e99cf509be64a26649))
+- revise readme and version ([f690b7f](https://github.com/forcedotcom/agents/commit/f690b7f8a911315f467f00f5f533e22e92c69a9e))
 
 ### Features
 
-* add initial agent job spec create and mock ([0c5d8d6](https://github.com/forcedotcom/agents/commit/0c5d8d6ab9e9a8470c7192a56350567882a3017b))
-
-
-
+- add initial agent job spec create and mock ([0c5d8d6](https://github.com/forcedotcom/agents/commit/0c5d8d6ab9e9a8470c7192a56350567882a3017b))
diff --git a/package.json b/package.json
@@ -11,8 +11,11 @@
     "url": "https://github.com/forcedotcom/agents.git"
   },
   "dependencies": {
+    "@oclif/table": "^0.3.3",
     "@salesforce/core": "^8.8.0",
-    "@salesforce/kit": "^3.2.3"
+    "@salesforce/kit": "^3.2.3",
+    "@salesforce/sf-plugins-core": "^12.1.0",
+    "nock": "^13.5.6"
   },
   "devDependencies": {
     "@salesforce/cli-plugins-testkit": "^5.3.38",

diff --git a/src/agent.ts b/src/agent.ts
@@ -5,12 +5,10 @@
  * For full license text, see LICENSE.txt file in the repo root or https://opensource.org/licenses/BSD-3-Clause
  */
 
-import { join } from 'node:path';
-import { readFileSync, statSync } from 'node:fs';
 import { inspect } from 'node:util';
 import { Connection, Logger, SfError, SfProject } from '@salesforce/core';
 import { Duration, sleep } from '@salesforce/kit';
-import { getMockDir } from './mockDir';
+import { MaybeMock } from './maybe-mock';
 import {
   type SfAgent,
   type AgentCreateConfig,
@@ -22,11 +20,11 @@ import {
 
 export class Agent implements SfAgent {
   private logger: Logger;
-  private mockDir?: string;
+  private maybeMock: MaybeMock;
 
-  public constructor(private connection: Connection, private project: SfProject) {
+  public constructor(connection: Connection, private project: SfProject) {
     this.logger = Logger.childFromRoot(this.constructor.name);
-    this.mockDir = getMockDir();
+    this.maybeMock = new MaybeMock(connection);
   }
 
   public async create(config: AgentCreateConfig): Promise<AgentCreateResponse> {
@@ -50,48 +48,14 @@ export class Agent implements SfAgent {
     this.verifyAgentSpecConfig(config);
 
     let agentSpec: AgentJobSpec;
-
-    if (this.mockDir) {
-      const specFileName = `${config.name}.json`;
-      const specFilePath = join(this.mockDir, `${specFileName}`);
-      try {
-        this.logger.debug(`Using mock directory: ${this.mockDir} for agent job spec creation`);
-        statSync(specFilePath);
-      } catch (err) {
-        throw SfError.create({
-          name: 'MissingMockFile',
-          message: `SF_MOCK_DIR [${this.mockDir}] must contain a spec file with name ${specFileName}`,
-          cause: err,
-        });
-      }
-      try {
-        this.logger.debug(`Returning mock agent spec file: ${specFilePath}`);
-        agentSpec = JSON.parse(readFileSync(specFilePath, 'utf8')) as AgentJobSpec;
-      } catch (err) {
-        throw SfError.create({
-          name: 'InvalidMockFile',
-          message: `SF_MOCK_DIR [${this.mockDir}] must contain a valid spec file with name ${specFileName}`,
-          cause: err,
-          actions: [
-            'Check that the file is readable',
-            'Check that the file is a valid JSON array of jobTitle and jobDescription objects',
-          ],
-        });
-      }
+    const response = await this.maybeMock.request<AgentJobSpecCreateResponse>('GET', this.buildAgentJobSpecUrl(config));
+    if (response.isSuccess && response.jobSpecs) {
+      agentSpec = response.jobSpecs;
     } else {
-      // TODO: We'll probably want to wrap this for better error handling but let's see
-      //       what it looks like first.
-      const response = await this.connection.requestGet<AgentJobSpecCreateResponse>(this.buildAgentJobSpecUrl(config), {
-        retry: { maxRetries: 3 },
+      throw SfError.create({
+        name: 'AgentJobSpecCreateError',
+        message: response.errorMessage ?? 'unknown',
       });
-      if (response.isSuccess) {
-        agentSpec = response?.jobSpecs as AgentJobSpec;
-      } else {
-        throw SfError.create({
-          name: 'AgentJobSpecCreateError',
-          message: response.errorMessage ?? 'unknown',
-        });
-      }
     }
 
     return agentSpec;

diff --git a/src/agentTester.ts b/src/agentTester.ts
@@ -0,0 +1,190 @@
+/*
+ * Copyright (c) 2024, salesforce.com, inc.
+ * All rights reserved.
+ * Licensed under the BSD 3-Clause license.
+ * For full license text, see LICENSE.txt file in the repo root or https://opensource.org/licenses/BSD-3-Clause
+ */
+import { Connection, Lifecycle, PollingClient, StatusResult } from '@salesforce/core';
+import { Duration } from '@salesforce/kit';
+import { MaybeMock } from './maybe-mock';
+
+type Format = 'human' | 'json';
+
+type TestStatus = 'NEW' | 'IN_PROGRESS' | 'COMPLETED' | 'ERROR';
+
+type AgentTestStartResponse = {
+  aiEvaluationId: string;
+  status: TestStatus;
+};
+
+type AgentTestStatusResponse = {
+  status: TestStatus;
+  startTime: string;
+  endTime?: string;
+  errorMessage?: string;
+};
+
+type TestCaseResult = {
+  status: TestStatus;
+  number: string;
+  startTime: string;
+  endTime?: string;
+  generatedData: {
+    type: 'AGENT';
+    actionsSequence: string[];
+    outcome: 'Success' | 'Failure';
+    topic: string;
+    inputTokensCount: string;
+    outputTokensCount: string;
+  };
+  expectationResults: Array<{
+    name: string;
+    actualValue: string;
+    expectedValue: string;
+    score: number;
+    result: 'Passed' | 'Failed';
+    metricLabel: 'Accuracy' | 'Precision';
+    metricExplainability: string;
+    status: TestStatus;
+    startTime: string;
+    endTime?: string;
+    errorCode?: string;
+    errorMessage?: string;
+  }>;
+};
+
+type AgentTestDetailsResponse = {
+  status: TestStatus;
+  startTime: string;
+  endTime?: string;
+  errorMessage?: string;
+  testCases: TestCaseResult[];
+};
+
+export class AgentTester {
+  private maybeMock: MaybeMock;
+  public constructor(connection: Connection) {
+    this.maybeMock = new MaybeMock(connection);
+  }
+
+  /**
+   * Starts an AI evaluation run based on the provided name or ID.
+   *
+   * @param nameOrId - The name or ID of the AI evaluation definition.
+   * @param type - Specifies whether the provided identifier is a 'name' or 'id'. Defaults to 'name'. If 'name' is provided, nameOrId is treated as the name of the AiEvaluationDefinition. If 'id' is provided, nameOrId is treated as the unique ID of the AiEvaluationDefinition.
+   * @returns A promise that resolves to an object containing the ID of the started AI evaluation run.
+   */
+  public async start(nameOrId: string, type: 'name' | 'id' = 'name'): Promise<{ aiEvaluationId: string }> {
+    const url = '/einstein/ai-evaluations/runs';
+
+    return this.maybeMock.request<AgentTestStartResponse>('POST', url, {
+      [type === 'name' ? 'aiEvaluationDefinitionName' : 'aiEvaluationDefinitionVersionId']: nameOrId,
+    });
+  }
+
+  public async status(jobId: string): Promise<AgentTestStatusResponse> {
+    const url = `/einstein/ai-evaluations/runs/${jobId}`;
+
+    return this.maybeMock.request<AgentTestStatusResponse>('GET', url);
+  }
+
+  public async poll(
+    jobId: string,
+    {
+      format = 'human',
+      timeout = Duration.minutes(5),
+    }: {
+      format?: Format;
+      timeout?: Duration;
+    } = {
+      format: 'human',
+      timeout: Duration.minutes(5),
+    }
+  ): Promise<{ response: AgentTestDetailsResponse; formatted: string }> {
+    const lifecycle = Lifecycle.getInstance();
+    const client = await PollingClient.create({
+      poll: async (): Promise<StatusResult> => {
+        // NOTE: we don't actually need to call the status API here since all the same information is present on the
+        // details API. We could just call the details API and check the status there.
+        const [detailsResponse, statusResponse] = await Promise.all([this.details(jobId, format), this.status(jobId)]);
+        const totalTestCases = detailsResponse.response.testCases.length;
+        const failingTestCases = detailsResponse.response.testCases.filter((tc) => tc.status === 'ERROR').length;
+        const passingTestCases = detailsResponse.response.testCases.filter(
+          (tc) => tc.status === 'COMPLETED' && tc.expectationResults.every((r) => r.result === 'Passed')
+        ).length;
+
+        if (statusResponse.status.toLowerCase() === 'completed') {
+          await lifecycle.emit('AGENT_TEST_POLLING_EVENT', {
+            jobId,
+            status: statusResponse.status,
+            totalTestCases,
+            failingTestCases,
+            passingTestCases,
+          });
+          return { payload: await this.details(jobId, format), completed: true };
+        }
+
+        await lifecycle.emit('AGENT_TEST_POLLING_EVENT', {
+          jobId,
+          status: statusResponse.status,
+          totalTestCases,
+          failingTestCases,
+          passingTestCases,
+        });
+        return { completed: false };
+      },
+      frequency: Duration.seconds(1),
+      timeout,
+    });
+
+    const result = await client.subscribe<{ response: AgentTestDetailsResponse; formatted: string }>();
+    return result;
+  }
+
+  public async details(
+    jobId: string,
+    format: Format = 'human'
+  ): Promise<{ response: AgentTestDetailsResponse; formatted: string }> {
+    const url = `/einstein/ai-evaluations/runs/${jobId}/details`;
+
+    const response = await this.maybeMock.request<AgentTestDetailsResponse>('GET', url);
+    return {
+      response,
+      formatted: format === 'human' ? await humanFormat(jobId, response) : await jsonFormat(response),
+    };
+  }
+
+  public async cancel(jobId: string): Promise<{ success: boolean }> {
+    const url = `/einstein/ai-evaluations/runs/${jobId}/cancel`;
+
+    return this.maybeMock.request<{ success: boolean }>('POST', url);
+  }
+}
+
+export async function humanFormat(name: string, details: AgentTestDetailsResponse): Promise<string> {
+  const { Ux } = await import('@salesforce/sf-plugins-core');
+  const ux = new Ux();
+
+  const tables: string[] = [];
+  for (const testCase of details.testCases) {
+    const table = ux.makeTable({
+      title: `Test Case #${testCase.number}`,
+      data: testCase.expectationResults.map((r) => ({
+        name: r.name,
+        outcome: r.result === 'Passed' ? 'Pass' : 'Fail',
+        actualValue: r.actualValue,
+        expectedValue: r.expectedValue,
+        score: r.score,
+        'metric label': r.metricLabel,
+        message: r.errorMessage ?? '',
+        'runtime (MS)': r.endTime ? new Date(r.endTime).getTime() - new Date(r.startTime).getTime() : 0,
+      })),
+    });
+    tables.push(table);
+  }
+  return tables.join('\n');
+}
+
+export async function jsonFormat(details: AgentTestDetailsResponse): Promise<string> {
+  return Promise.resolve(JSON.stringify(details, null, 2));
+}
diff --git a/src/index.ts b/src/index.ts
@@ -14,3 +14,4 @@ export {
   SfAgent,
 } from './types';
 export { Agent } from './agent';
+export { AgentTester } from './agentTester';