-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #6 from forcedotcom/mdonnalley/agent-tests
feat: mock agent tests
- Loading branch information
Showing
18 changed files
with
1,331 additions
and
196 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,69 +1,46 @@ | ||
## [0.1.6](https://github.com/forcedotcom/agents/compare/0.1.5...0.1.6) (2024-11-16) | ||
|
||
|
||
### Bug Fixes | ||
|
||
* **deps:** bump cross-spawn from 7.0.3 to 7.0.5 ([7f43cc7](https://github.com/forcedotcom/agents/commit/7f43cc706b848fd54c88d04bee2c0b7b632d7e76)) | ||
|
||
|
||
- **deps:** bump cross-spawn from 7.0.3 to 7.0.5 ([7f43cc7](https://github.com/forcedotcom/agents/commit/7f43cc706b848fd54c88d04bee2c0b7b632d7e76)) | ||
|
||
## [0.1.5](https://github.com/forcedotcom/agents/compare/0.1.4...0.1.5) (2024-11-16) | ||
|
||
|
||
### Bug Fixes | ||
|
||
* **deps:** bump @salesforce/core from 8.6.3 to 8.8.0 ([193237b](https://github.com/forcedotcom/agents/commit/193237b5dbbe7ce1ee596a3b7305b5602d0883f8)) | ||
|
||
|
||
- **deps:** bump @salesforce/core from 8.6.3 to 8.8.0 ([193237b](https://github.com/forcedotcom/agents/commit/193237b5dbbe7ce1ee596a3b7305b5602d0883f8)) | ||
|
||
## [0.1.4](https://github.com/forcedotcom/agents/compare/0.1.3...0.1.4) (2024-11-12) | ||
|
||
|
||
### Bug Fixes | ||
|
||
* do not append spec in name ([284d5d5](https://github.com/forcedotcom/agents/commit/284d5d56ed99c67b93a65904a00fdb00a2552a0e)) | ||
|
||
|
||
- do not append spec in name ([284d5d5](https://github.com/forcedotcom/agents/commit/284d5d56ed99c67b93a65904a00fdb00a2552a0e)) | ||
|
||
## [0.1.3](https://github.com/forcedotcom/agents/compare/0.1.2...0.1.3) (2024-11-12) | ||
|
||
|
||
### Bug Fixes | ||
|
||
* use latest ([92ecbba](https://github.com/forcedotcom/agents/commit/92ecbbabc403fe57bf4069f9928b029d23db7a16)) | ||
|
||
|
||
- use latest ([92ecbba](https://github.com/forcedotcom/agents/commit/92ecbbabc403fe57bf4069f9928b029d23db7a16)) | ||
|
||
## [0.1.2](https://github.com/forcedotcom/agents/compare/0.1.1...0.1.2) (2024-11-12) | ||
|
||
|
||
### Bug Fixes | ||
|
||
* publish to preview ([3f5ccb6](https://github.com/forcedotcom/agents/commit/3f5ccb687017186eb29b8b18c7fdce33daee1f70)) | ||
|
||
|
||
- publish to preview ([3f5ccb6](https://github.com/forcedotcom/agents/commit/3f5ccb687017186eb29b8b18c7fdce33daee1f70)) | ||
|
||
## [0.1.1](https://github.com/forcedotcom/agents/compare/0.1.0...0.1.1) (2024-11-10) | ||
|
||
|
||
### Bug Fixes | ||
|
||
* export Agent class ([6c42b63](https://github.com/forcedotcom/agents/commit/6c42b63bbe9a5a5cf6fa0cea8f5649d07aaa6adc)) | ||
|
||
|
||
- export Agent class ([6c42b63](https://github.com/forcedotcom/agents/commit/6c42b63bbe9a5a5cf6fa0cea8f5649d07aaa6adc)) | ||
|
||
# [0.1.0](https://github.com/forcedotcom/agents/compare/0c5d8d6ab9e9a8470c7192a56350567882a3017b...0.1.0) (2024-11-09) | ||
|
||
|
||
### Bug Fixes | ||
|
||
* improve types and linting ([d5a6cb3](https://github.com/forcedotcom/agents/commit/d5a6cb3348e63d52e10540e99cf509be64a26649)) | ||
* revise readme and version ([f690b7f](https://github.com/forcedotcom/agents/commit/f690b7f8a911315f467f00f5f533e22e92c69a9e)) | ||
|
||
- improve types and linting ([d5a6cb3](https://github.com/forcedotcom/agents/commit/d5a6cb3348e63d52e10540e99cf509be64a26649)) | ||
- revise readme and version ([f690b7f](https://github.com/forcedotcom/agents/commit/f690b7f8a911315f467f00f5f533e22e92c69a9e)) | ||
|
||
### Features | ||
|
||
* add initial agent job spec create and mock ([0c5d8d6](https://github.com/forcedotcom/agents/commit/0c5d8d6ab9e9a8470c7192a56350567882a3017b)) | ||
|
||
|
||
|
||
- add initial agent job spec create and mock ([0c5d8d6](https://github.com/forcedotcom/agents/commit/0c5d8d6ab9e9a8470c7192a56350567882a3017b)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,190 @@ | ||
/* | ||
* Copyright (c) 2024, salesforce.com, inc. | ||
* All rights reserved. | ||
* Licensed under the BSD 3-Clause license. | ||
* For full license text, see LICENSE.txt file in the repo root or https://opensource.org/licenses/BSD-3-Clause | ||
*/ | ||
import { Connection, Lifecycle, PollingClient, StatusResult } from '@salesforce/core'; | ||
import { Duration } from '@salesforce/kit'; | ||
import { MaybeMock } from './maybe-mock'; | ||
|
||
type Format = 'human' | 'json'; | ||
|
||
type TestStatus = 'NEW' | 'IN_PROGRESS' | 'COMPLETED' | 'ERROR'; | ||
|
||
type AgentTestStartResponse = { | ||
aiEvaluationId: string; | ||
status: TestStatus; | ||
}; | ||
|
||
type AgentTestStatusResponse = { | ||
status: TestStatus; | ||
startTime: string; | ||
endTime?: string; | ||
errorMessage?: string; | ||
}; | ||
|
||
type TestCaseResult = { | ||
status: TestStatus; | ||
number: string; | ||
startTime: string; | ||
endTime?: string; | ||
generatedData: { | ||
type: 'AGENT'; | ||
actionsSequence: string[]; | ||
outcome: 'Success' | 'Failure'; | ||
topic: string; | ||
inputTokensCount: string; | ||
outputTokensCount: string; | ||
}; | ||
expectationResults: Array<{ | ||
name: string; | ||
actualValue: string; | ||
expectedValue: string; | ||
score: number; | ||
result: 'Passed' | 'Failed'; | ||
metricLabel: 'Accuracy' | 'Precision'; | ||
metricExplainability: string; | ||
status: TestStatus; | ||
startTime: string; | ||
endTime?: string; | ||
errorCode?: string; | ||
errorMessage?: string; | ||
}>; | ||
}; | ||
|
||
type AgentTestDetailsResponse = { | ||
status: TestStatus; | ||
startTime: string; | ||
endTime?: string; | ||
errorMessage?: string; | ||
testCases: TestCaseResult[]; | ||
}; | ||
|
||
export class AgentTester { | ||
private maybeMock: MaybeMock; | ||
public constructor(connection: Connection) { | ||
this.maybeMock = new MaybeMock(connection); | ||
} | ||
|
||
/** | ||
* Starts an AI evaluation run based on the provided name or ID. | ||
* | ||
* @param nameOrId - The name or ID of the AI evaluation definition. | ||
* @param type - Specifies whether the provided identifier is a 'name' or 'id'. Defaults to 'name'. If 'name' is provided, nameOrId is treated as the name of the AiEvaluationDefinition. If 'id' is provided, nameOrId is treated as the unique ID of the AiEvaluationDefinition. | ||
* @returns A promise that resolves to an object containing the ID of the started AI evaluation run. | ||
*/ | ||
public async start(nameOrId: string, type: 'name' | 'id' = 'name'): Promise<{ aiEvaluationId: string }> { | ||
const url = '/einstein/ai-evaluations/runs'; | ||
|
||
return this.maybeMock.request<AgentTestStartResponse>('POST', url, { | ||
[type === 'name' ? 'aiEvaluationDefinitionName' : 'aiEvaluationDefinitionVersionId']: nameOrId, | ||
}); | ||
} | ||
|
||
public async status(jobId: string): Promise<AgentTestStatusResponse> { | ||
const url = `/einstein/ai-evaluations/runs/${jobId}`; | ||
|
||
return this.maybeMock.request<AgentTestStatusResponse>('GET', url); | ||
} | ||
|
||
public async poll( | ||
jobId: string, | ||
{ | ||
format = 'human', | ||
timeout = Duration.minutes(5), | ||
}: { | ||
format?: Format; | ||
timeout?: Duration; | ||
} = { | ||
format: 'human', | ||
timeout: Duration.minutes(5), | ||
} | ||
): Promise<{ response: AgentTestDetailsResponse; formatted: string }> { | ||
const lifecycle = Lifecycle.getInstance(); | ||
const client = await PollingClient.create({ | ||
poll: async (): Promise<StatusResult> => { | ||
// NOTE: we don't actually need to call the status API here since all the same information is present on the | ||
// details API. We could just call the details API and check the status there. | ||
const [detailsResponse, statusResponse] = await Promise.all([this.details(jobId, format), this.status(jobId)]); | ||
const totalTestCases = detailsResponse.response.testCases.length; | ||
const failingTestCases = detailsResponse.response.testCases.filter((tc) => tc.status === 'ERROR').length; | ||
const passingTestCases = detailsResponse.response.testCases.filter( | ||
(tc) => tc.status === 'COMPLETED' && tc.expectationResults.every((r) => r.result === 'Passed') | ||
).length; | ||
|
||
if (statusResponse.status.toLowerCase() === 'completed') { | ||
await lifecycle.emit('AGENT_TEST_POLLING_EVENT', { | ||
jobId, | ||
status: statusResponse.status, | ||
totalTestCases, | ||
failingTestCases, | ||
passingTestCases, | ||
}); | ||
return { payload: await this.details(jobId, format), completed: true }; | ||
} | ||
|
||
await lifecycle.emit('AGENT_TEST_POLLING_EVENT', { | ||
jobId, | ||
status: statusResponse.status, | ||
totalTestCases, | ||
failingTestCases, | ||
passingTestCases, | ||
}); | ||
return { completed: false }; | ||
}, | ||
frequency: Duration.seconds(1), | ||
timeout, | ||
}); | ||
|
||
const result = await client.subscribe<{ response: AgentTestDetailsResponse; formatted: string }>(); | ||
return result; | ||
} | ||
|
||
public async details( | ||
jobId: string, | ||
format: Format = 'human' | ||
): Promise<{ response: AgentTestDetailsResponse; formatted: string }> { | ||
const url = `/einstein/ai-evaluations/runs/${jobId}/details`; | ||
|
||
const response = await this.maybeMock.request<AgentTestDetailsResponse>('GET', url); | ||
return { | ||
response, | ||
formatted: format === 'human' ? await humanFormat(jobId, response) : await jsonFormat(response), | ||
}; | ||
} | ||
|
||
public async cancel(jobId: string): Promise<{ success: boolean }> { | ||
const url = `/einstein/ai-evaluations/runs/${jobId}/cancel`; | ||
|
||
return this.maybeMock.request<{ success: boolean }>('POST', url); | ||
} | ||
} | ||
|
||
export async function humanFormat(name: string, details: AgentTestDetailsResponse): Promise<string> { | ||
const { Ux } = await import('@salesforce/sf-plugins-core'); | ||
const ux = new Ux(); | ||
|
||
const tables: string[] = []; | ||
for (const testCase of details.testCases) { | ||
const table = ux.makeTable({ | ||
title: `Test Case #${testCase.number}`, | ||
data: testCase.expectationResults.map((r) => ({ | ||
name: r.name, | ||
outcome: r.result === 'Passed' ? 'Pass' : 'Fail', | ||
actualValue: r.actualValue, | ||
expectedValue: r.expectedValue, | ||
score: r.score, | ||
'metric label': r.metricLabel, | ||
message: r.errorMessage ?? '', | ||
'runtime (MS)': r.endTime ? new Date(r.endTime).getTime() - new Date(r.startTime).getTime() : 0, | ||
})), | ||
}); | ||
tables.push(table); | ||
} | ||
return tables.join('\n'); | ||
} | ||
|
||
export async function jsonFormat(details: AgentTestDetailsResponse): Promise<string> { | ||
return Promise.resolve(JSON.stringify(details, null, 2)); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.