Skip to content

Commit

Permalink
Merge pull request #6 from forcedotcom/mdonnalley/agent-tests
Browse files Browse the repository at this point in the history
feat: mock agent tests
  • Loading branch information
mdonnalley authored Dec 2, 2024
2 parents 129ff9c + 8d346c9 commit e7f7698
Show file tree
Hide file tree
Showing 18 changed files with 1,331 additions and 196 deletions.
41 changes: 9 additions & 32 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,69 +1,46 @@
## [0.1.6](https://github.com/forcedotcom/agents/compare/0.1.5...0.1.6) (2024-11-16)


### Bug Fixes

* **deps:** bump cross-spawn from 7.0.3 to 7.0.5 ([7f43cc7](https://github.com/forcedotcom/agents/commit/7f43cc706b848fd54c88d04bee2c0b7b632d7e76))


- **deps:** bump cross-spawn from 7.0.3 to 7.0.5 ([7f43cc7](https://github.com/forcedotcom/agents/commit/7f43cc706b848fd54c88d04bee2c0b7b632d7e76))

## [0.1.5](https://github.com/forcedotcom/agents/compare/0.1.4...0.1.5) (2024-11-16)


### Bug Fixes

* **deps:** bump @salesforce/core from 8.6.3 to 8.8.0 ([193237b](https://github.com/forcedotcom/agents/commit/193237b5dbbe7ce1ee596a3b7305b5602d0883f8))


- **deps:** bump @salesforce/core from 8.6.3 to 8.8.0 ([193237b](https://github.com/forcedotcom/agents/commit/193237b5dbbe7ce1ee596a3b7305b5602d0883f8))

## [0.1.4](https://github.com/forcedotcom/agents/compare/0.1.3...0.1.4) (2024-11-12)


### Bug Fixes

* do not append spec in name ([284d5d5](https://github.com/forcedotcom/agents/commit/284d5d56ed99c67b93a65904a00fdb00a2552a0e))


- do not append spec in name ([284d5d5](https://github.com/forcedotcom/agents/commit/284d5d56ed99c67b93a65904a00fdb00a2552a0e))

## [0.1.3](https://github.com/forcedotcom/agents/compare/0.1.2...0.1.3) (2024-11-12)


### Bug Fixes

* use latest ([92ecbba](https://github.com/forcedotcom/agents/commit/92ecbbabc403fe57bf4069f9928b029d23db7a16))


- use latest ([92ecbba](https://github.com/forcedotcom/agents/commit/92ecbbabc403fe57bf4069f9928b029d23db7a16))

## [0.1.2](https://github.com/forcedotcom/agents/compare/0.1.1...0.1.2) (2024-11-12)


### Bug Fixes

* publish to preview ([3f5ccb6](https://github.com/forcedotcom/agents/commit/3f5ccb687017186eb29b8b18c7fdce33daee1f70))


- publish to preview ([3f5ccb6](https://github.com/forcedotcom/agents/commit/3f5ccb687017186eb29b8b18c7fdce33daee1f70))

## [0.1.1](https://github.com/forcedotcom/agents/compare/0.1.0...0.1.1) (2024-11-10)


### Bug Fixes

* export Agent class ([6c42b63](https://github.com/forcedotcom/agents/commit/6c42b63bbe9a5a5cf6fa0cea8f5649d07aaa6adc))


- export Agent class ([6c42b63](https://github.com/forcedotcom/agents/commit/6c42b63bbe9a5a5cf6fa0cea8f5649d07aaa6adc))

# [0.1.0](https://github.com/forcedotcom/agents/compare/0c5d8d6ab9e9a8470c7192a56350567882a3017b...0.1.0) (2024-11-09)


### Bug Fixes

* improve types and linting ([d5a6cb3](https://github.com/forcedotcom/agents/commit/d5a6cb3348e63d52e10540e99cf509be64a26649))
* revise readme and version ([f690b7f](https://github.com/forcedotcom/agents/commit/f690b7f8a911315f467f00f5f533e22e92c69a9e))

- improve types and linting ([d5a6cb3](https://github.com/forcedotcom/agents/commit/d5a6cb3348e63d52e10540e99cf509be64a26649))
- revise readme and version ([f690b7f](https://github.com/forcedotcom/agents/commit/f690b7f8a911315f467f00f5f533e22e92c69a9e))

### Features

* add initial agent job spec create and mock ([0c5d8d6](https://github.com/forcedotcom/agents/commit/0c5d8d6ab9e9a8470c7192a56350567882a3017b))



- add initial agent job spec create and mock ([0c5d8d6](https://github.com/forcedotcom/agents/commit/0c5d8d6ab9e9a8470c7192a56350567882a3017b))
5 changes: 4 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,11 @@
"url": "https://github.com/forcedotcom/agents.git"
},
"dependencies": {
"@oclif/table": "^0.3.3",
"@salesforce/core": "^8.8.0",
"@salesforce/kit": "^3.2.3"
"@salesforce/kit": "^3.2.3",
"@salesforce/sf-plugins-core": "^12.1.0",
"nock": "^13.5.6"
},
"devDependencies": {
"@salesforce/cli-plugins-testkit": "^5.3.38",
Expand Down
56 changes: 10 additions & 46 deletions src/agent.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,10 @@
* For full license text, see LICENSE.txt file in the repo root or https://opensource.org/licenses/BSD-3-Clause
*/

import { join } from 'node:path';
import { readFileSync, statSync } from 'node:fs';
import { inspect } from 'node:util';
import { Connection, Logger, SfError, SfProject } from '@salesforce/core';
import { Duration, sleep } from '@salesforce/kit';
import { getMockDir } from './mockDir';
import { MaybeMock } from './maybe-mock';
import {
type SfAgent,
type AgentCreateConfig,
Expand All @@ -22,11 +20,11 @@ import {

export class Agent implements SfAgent {
private logger: Logger;
private mockDir?: string;
private maybeMock: MaybeMock;

public constructor(private connection: Connection, private project: SfProject) {
public constructor(connection: Connection, private project: SfProject) {
this.logger = Logger.childFromRoot(this.constructor.name);
this.mockDir = getMockDir();
this.maybeMock = new MaybeMock(connection);
}

public async create(config: AgentCreateConfig): Promise<AgentCreateResponse> {
Expand All @@ -50,48 +48,14 @@ export class Agent implements SfAgent {
this.verifyAgentSpecConfig(config);

let agentSpec: AgentJobSpec;

if (this.mockDir) {
const specFileName = `${config.name}.json`;
const specFilePath = join(this.mockDir, `${specFileName}`);
try {
this.logger.debug(`Using mock directory: ${this.mockDir} for agent job spec creation`);
statSync(specFilePath);
} catch (err) {
throw SfError.create({
name: 'MissingMockFile',
message: `SF_MOCK_DIR [${this.mockDir}] must contain a spec file with name ${specFileName}`,
cause: err,
});
}
try {
this.logger.debug(`Returning mock agent spec file: ${specFilePath}`);
agentSpec = JSON.parse(readFileSync(specFilePath, 'utf8')) as AgentJobSpec;
} catch (err) {
throw SfError.create({
name: 'InvalidMockFile',
message: `SF_MOCK_DIR [${this.mockDir}] must contain a valid spec file with name ${specFileName}`,
cause: err,
actions: [
'Check that the file is readable',
'Check that the file is a valid JSON array of jobTitle and jobDescription objects',
],
});
}
const response = await this.maybeMock.request<AgentJobSpecCreateResponse>('GET', this.buildAgentJobSpecUrl(config));
if (response.isSuccess && response.jobSpecs) {
agentSpec = response.jobSpecs;
} else {
// TODO: We'll probably want to wrap this for better error handling but let's see
// what it looks like first.
const response = await this.connection.requestGet<AgentJobSpecCreateResponse>(this.buildAgentJobSpecUrl(config), {
retry: { maxRetries: 3 },
throw SfError.create({
name: 'AgentJobSpecCreateError',
message: response.errorMessage ?? 'unknown',
});
if (response.isSuccess) {
agentSpec = response?.jobSpecs as AgentJobSpec;
} else {
throw SfError.create({
name: 'AgentJobSpecCreateError',
message: response.errorMessage ?? 'unknown',
});
}
}

return agentSpec;
Expand Down
190 changes: 190 additions & 0 deletions src/agentTester.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,190 @@
/*
* Copyright (c) 2024, salesforce.com, inc.
* All rights reserved.
* Licensed under the BSD 3-Clause license.
* For full license text, see LICENSE.txt file in the repo root or https://opensource.org/licenses/BSD-3-Clause
*/
import { Connection, Lifecycle, PollingClient, StatusResult } from '@salesforce/core';
import { Duration } from '@salesforce/kit';
import { MaybeMock } from './maybe-mock';

type Format = 'human' | 'json';

type TestStatus = 'NEW' | 'IN_PROGRESS' | 'COMPLETED' | 'ERROR';

type AgentTestStartResponse = {
aiEvaluationId: string;
status: TestStatus;
};

type AgentTestStatusResponse = {
status: TestStatus;
startTime: string;
endTime?: string;
errorMessage?: string;
};

type TestCaseResult = {
status: TestStatus;
number: string;
startTime: string;
endTime?: string;
generatedData: {
type: 'AGENT';
actionsSequence: string[];
outcome: 'Success' | 'Failure';
topic: string;
inputTokensCount: string;
outputTokensCount: string;
};
expectationResults: Array<{
name: string;
actualValue: string;
expectedValue: string;
score: number;
result: 'Passed' | 'Failed';
metricLabel: 'Accuracy' | 'Precision';
metricExplainability: string;
status: TestStatus;
startTime: string;
endTime?: string;
errorCode?: string;
errorMessage?: string;
}>;
};

type AgentTestDetailsResponse = {
status: TestStatus;
startTime: string;
endTime?: string;
errorMessage?: string;
testCases: TestCaseResult[];
};

export class AgentTester {
private maybeMock: MaybeMock;
public constructor(connection: Connection) {
this.maybeMock = new MaybeMock(connection);
}

/**
* Starts an AI evaluation run based on the provided name or ID.
*
* @param nameOrId - The name or ID of the AI evaluation definition.
* @param type - Specifies whether the provided identifier is a 'name' or 'id'. Defaults to 'name'. If 'name' is provided, nameOrId is treated as the name of the AiEvaluationDefinition. If 'id' is provided, nameOrId is treated as the unique ID of the AiEvaluationDefinition.
* @returns A promise that resolves to an object containing the ID of the started AI evaluation run.
*/
public async start(nameOrId: string, type: 'name' | 'id' = 'name'): Promise<{ aiEvaluationId: string }> {
const url = '/einstein/ai-evaluations/runs';

return this.maybeMock.request<AgentTestStartResponse>('POST', url, {
[type === 'name' ? 'aiEvaluationDefinitionName' : 'aiEvaluationDefinitionVersionId']: nameOrId,
});
}

public async status(jobId: string): Promise<AgentTestStatusResponse> {
const url = `/einstein/ai-evaluations/runs/${jobId}`;

return this.maybeMock.request<AgentTestStatusResponse>('GET', url);
}

public async poll(
jobId: string,
{
format = 'human',
timeout = Duration.minutes(5),
}: {
format?: Format;
timeout?: Duration;
} = {
format: 'human',
timeout: Duration.minutes(5),
}
): Promise<{ response: AgentTestDetailsResponse; formatted: string }> {
const lifecycle = Lifecycle.getInstance();
const client = await PollingClient.create({
poll: async (): Promise<StatusResult> => {
// NOTE: we don't actually need to call the status API here since all the same information is present on the
// details API. We could just call the details API and check the status there.
const [detailsResponse, statusResponse] = await Promise.all([this.details(jobId, format), this.status(jobId)]);
const totalTestCases = detailsResponse.response.testCases.length;
const failingTestCases = detailsResponse.response.testCases.filter((tc) => tc.status === 'ERROR').length;
const passingTestCases = detailsResponse.response.testCases.filter(
(tc) => tc.status === 'COMPLETED' && tc.expectationResults.every((r) => r.result === 'Passed')
).length;

if (statusResponse.status.toLowerCase() === 'completed') {
await lifecycle.emit('AGENT_TEST_POLLING_EVENT', {
jobId,
status: statusResponse.status,
totalTestCases,
failingTestCases,
passingTestCases,
});
return { payload: await this.details(jobId, format), completed: true };
}

await lifecycle.emit('AGENT_TEST_POLLING_EVENT', {
jobId,
status: statusResponse.status,
totalTestCases,
failingTestCases,
passingTestCases,
});
return { completed: false };
},
frequency: Duration.seconds(1),
timeout,
});

const result = await client.subscribe<{ response: AgentTestDetailsResponse; formatted: string }>();
return result;
}

public async details(
jobId: string,
format: Format = 'human'
): Promise<{ response: AgentTestDetailsResponse; formatted: string }> {
const url = `/einstein/ai-evaluations/runs/${jobId}/details`;

const response = await this.maybeMock.request<AgentTestDetailsResponse>('GET', url);
return {
response,
formatted: format === 'human' ? await humanFormat(jobId, response) : await jsonFormat(response),
};
}

public async cancel(jobId: string): Promise<{ success: boolean }> {
const url = `/einstein/ai-evaluations/runs/${jobId}/cancel`;

return this.maybeMock.request<{ success: boolean }>('POST', url);
}
}

export async function humanFormat(name: string, details: AgentTestDetailsResponse): Promise<string> {
const { Ux } = await import('@salesforce/sf-plugins-core');
const ux = new Ux();

const tables: string[] = [];
for (const testCase of details.testCases) {
const table = ux.makeTable({
title: `Test Case #${testCase.number}`,
data: testCase.expectationResults.map((r) => ({
name: r.name,
outcome: r.result === 'Passed' ? 'Pass' : 'Fail',
actualValue: r.actualValue,
expectedValue: r.expectedValue,
score: r.score,
'metric label': r.metricLabel,
message: r.errorMessage ?? '',
'runtime (MS)': r.endTime ? new Date(r.endTime).getTime() - new Date(r.startTime).getTime() : 0,
})),
});
tables.push(table);
}
return tables.join('\n');
}

export async function jsonFormat(details: AgentTestDetailsResponse): Promise<string> {
return Promise.resolve(JSON.stringify(details, null, 2));
}
1 change: 1 addition & 0 deletions src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,4 @@ export {
SfAgent,
} from './types';
export { Agent } from './agent';
export { AgentTester } from './agentTester';
Loading

0 comments on commit e7f7698

Please sign in to comment.