Skip to content

Commit

Permalink
fix: update expected response from details API
Browse files Browse the repository at this point in the history
  • Loading branch information
mdonnalley committed Dec 16, 2024
1 parent 07cdc22 commit 49a19fd
Show file tree
Hide file tree
Showing 3 changed files with 106 additions and 102 deletions.
28 changes: 15 additions & 13 deletions src/agentTester.ts
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,10 @@ export type AgentTestDetailsResponse = {
endTime?: string;
errorMessage?: string;
subjectName: string;
testSetName: string;
testCases: TestCaseResult[];
testSet: {
name: string;
testCases: TestCaseResult[];
};
};

/**
Expand Down Expand Up @@ -120,9 +122,9 @@ export class AgentTester {
// NOTE: we don't actually need to call the status API here since all the same information is present on the
// details API. We could just call the details API and check the status there.
const [detailsResponse, statusResponse] = await Promise.all([this.details(jobId), this.status(jobId)]);
const totalTestCases = detailsResponse.testCases.length;
const failingTestCases = detailsResponse.testCases.filter((tc) => tc.status === 'ERROR').length;
const passingTestCases = detailsResponse.testCases.filter(
const totalTestCases = detailsResponse.testSet.testCases.length;
const failingTestCases = detailsResponse.testSet.testCases.filter((tc) => tc.status === 'ERROR').length;
const passingTestCases = detailsResponse.testSet.testCases.filter(
(tc) => tc.status === 'COMPLETED' && tc.expectationResults.every((r) => r.result === 'Passed')
).length;

Expand Down Expand Up @@ -183,7 +185,7 @@ export async function humanFormat(details: AgentTestDetailsResponse): Promise<st
const ux = new Ux();

const tables: string[] = [];
for (const testCase of details.testCases) {
for (const testCase of details.testSet.testCases) {
const table = ux.makeTable({
title: `Test Case #${testCase.number}`,
data: testCase.expectationResults.map((r) => ({
Expand Down Expand Up @@ -215,9 +217,9 @@ export async function junitFormat(details: AgentTestDetailsResponse): Promise<st
ignoreAttributes: false,
});

const testCount = details.testCases.length;
const failureCount = details.testCases.filter((tc) => tc.status === 'ERROR').length;
const time = details.testCases.reduce((acc, tc) => {
const testCount = details.testSet.testCases.length;
const failureCount = details.testSet.testCases.filter((tc) => tc.status === 'ERROR').length;
const time = details.testSet.testCases.reduce((acc, tc) => {
if (tc.endTime && tc.startTime) {
return acc + new Date(tc.endTime).getTime() - new Date(tc.startTime).getTime();
}
Expand All @@ -235,13 +237,13 @@ export async function junitFormat(details: AgentTestDetailsResponse): Promise<st
{ $name: 'start-time', $value: details.startTime },
{ $name: 'end-time', $value: details.endTime },
],
testsuite: details.testCases.map((testCase) => {
testsuite: details.testSet.testCases.map((testCase) => {
const testCaseTime = testCase.endTime
? new Date(testCase.endTime).getTime() - new Date(testCase.startTime).getTime()
: 0;

return {
$name: `${details.testSetName}.${testCase.number}`,
$name: `${details.testSet.name}.${testCase.number}`,
$time: testCaseTime,
$assertions: testCase.expectationResults.length,
failure: testCase.expectationResults
Expand All @@ -262,11 +264,11 @@ export async function junitFormat(details: AgentTestDetailsResponse): Promise<st
export async function tapFormat(details: AgentTestDetailsResponse): Promise<string> {
const lines: string[] = [];
let expectationCount = 0;
for (const testCase of details.testCases) {
for (const testCase of details.testSet.testCases) {
for (const result of testCase.expectationResults) {
const status = result.result === 'Passed' ? 'ok' : 'not ok';
expectationCount++;
lines.push(`${status} ${expectationCount} ${details.testSetName}.${testCase.number}`);
lines.push(`${status} ${expectationCount} ${details.testSet.name}.${testCase.number}`);
if (status === 'not ok') {
lines.push(' ---');
lines.push(` message: ${result.errorMessage ?? 'Unknown error'}`);
Expand Down
2 changes: 1 addition & 1 deletion test/agentTester.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ describe('AgentTester', () => {
const response = await tester.poll('4KBSM000000003F4AQ');
expect(response).to.be.ok;
// TODO: make these assertions more meaningful
expect(response.testCases[0].status).to.equal('COMPLETED');
expect(response.testSet.testCases[0].status).to.equal('COMPLETED');
});
});

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,95 +4,97 @@
"endTime": "2024-11-28T12:05:00Z",
"errorMessage": null,
"subjectName": "Copilot_for_Salesforce",
"testSetName": "CRM_Sanity_v1",
"testCases": [
{
"status": "COMPLETED",
"number": 1,
"startTime": "2024-11-28T12:00:10Z",
"endTime": "2024-11-28T12:00:20Z",
"generatedData": {
"type": "AGENT",
"actionsSequence": ["Action1", "Action2"],
"outcome": "Success",
"topic": "Mathematics",
"inputTokensCount": 50,
"outputTokensCount": 55
},
"expectationResults": [
{
"name": "topic_sequence_match",
"actualValue": "Result A",
"expectedValue": "Result A",
"score": 1.0,
"result": "Passed",
"metricLabel": "Accuracy",
"metricExplainability": "Measures the correctness of the result.",
"status": "Completed",
"startTime": "2024-11-28T12:00:12Z",
"endTime": "2024-11-28T12:00:13Z",
"errorCode": null,
"errorMessage": null
"testSet": {
"name": "CRM_Sanity_v1",
"testCases": [
{
"status": "COMPLETED",
"number": 1,
"startTime": "2024-11-28T12:00:10Z",
"endTime": "2024-11-28T12:00:20Z",
"generatedData": {
"type": "AGENT",
"actionsSequence": ["Action1", "Action2"],
"outcome": "Success",
"topic": "Mathematics",
"inputTokensCount": 50,
"outputTokensCount": 55
},
{
"name": "action_sequence_match",
"actualValue": "Result B",
"expectedValue": "Result B",
"score": 0.9,
"result": "Passed",
"metricLabel": "Precision",
"metricExplainability": "Measures the precision of the result.",
"status": "Completed",
"startTime": "2024-11-28T12:00:14Z",
"endTime": "2024-11-28T12:00:15Z",
"errorCode": null,
"errorMessage": null
}
]
},
{
"status": "ERROR",
"number": 2,
"startTime": "2024-11-28T12:00:30Z",
"endTime": "2024-11-28T12:00:40Z",
"generatedData": {
"type": "AGENT",
"actionsSequence": ["Action3", "Action4"],
"outcome": "Failure",
"topic": "Physics",
"inputTokensCount": 60,
"outputTokensCount": 50
"expectationResults": [
{
"name": "topic_sequence_match",
"actualValue": "Result A",
"expectedValue": "Result A",
"score": 1.0,
"result": "Passed",
"metricLabel": "Accuracy",
"metricExplainability": "Measures the correctness of the result.",
"status": "Completed",
"startTime": "2024-11-28T12:00:12Z",
"endTime": "2024-11-28T12:00:13Z",
"errorCode": null,
"errorMessage": null
},
{
"name": "action_sequence_match",
"actualValue": "Result B",
"expectedValue": "Result B",
"score": 0.9,
"result": "Passed",
"metricLabel": "Precision",
"metricExplainability": "Measures the precision of the result.",
"status": "Completed",
"startTime": "2024-11-28T12:00:14Z",
"endTime": "2024-11-28T12:00:15Z",
"errorCode": null,
"errorMessage": null
}
]
},
"expectationResults": [
{
"name": "topic_sequence_match",
"actualValue": "Result C",
"expectedValue": "Result D",
"score": 0.5,
"result": "Failed",
"metricLabel": "Accuracy",
"metricExplainability": "Measures the correctness of the result.",
"status": "Completed",
"startTime": "2024-11-28T12:00:32Z",
"endTime": "2024-11-28T12:00:33Z",
"errorCode": null,
"errorMessage": "Expected \"Result D\" but got \"Result C\"."
{
"status": "ERROR",
"number": 2,
"startTime": "2024-11-28T12:00:30Z",
"endTime": "2024-11-28T12:00:40Z",
"generatedData": {
"type": "AGENT",
"actionsSequence": ["Action3", "Action4"],
"outcome": "Failure",
"topic": "Physics",
"inputTokensCount": 60,
"outputTokensCount": 50
},
{
"name": "topic_sequence_match",
"actualValue": "Result C",
"expectedValue": "Result D",
"score": 0.5,
"result": "Failed",
"metricLabel": "Accuracy",
"metricExplainability": "Measures the correctness of the result.",
"status": "Completed",
"startTime": "2024-11-28T12:00:32Z",
"endTime": "2024-11-28T12:00:33Z",
"errorCode": null,
"errorMessage": "Expected \"Result D\" but got \"Result C\"."
}
]
}
]
"expectationResults": [
{
"name": "topic_sequence_match",
"actualValue": "Result C",
"expectedValue": "Result D",
"score": 0.5,
"result": "Failed",
"metricLabel": "Accuracy",
"metricExplainability": "Measures the correctness of the result.",
"status": "Completed",
"startTime": "2024-11-28T12:00:32Z",
"endTime": "2024-11-28T12:00:33Z",
"errorCode": null,
"errorMessage": "Expected \"Result D\" but got \"Result C\"."
},
{
"name": "topic_sequence_match",
"actualValue": "Result C",
"expectedValue": "Result D",
"score": 0.5,
"result": "Failed",
"metricLabel": "Accuracy",
"metricExplainability": "Measures the correctness of the result.",
"status": "Completed",
"startTime": "2024-11-28T12:00:32Z",
"endTime": "2024-11-28T12:00:33Z",
"errorCode": null,
"errorMessage": "Expected \"Result D\" but got \"Result C\"."
}
]
}
]
}
}

0 comments on commit 49a19fd

Please sign in to comment.