Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow filtering samples by compound expressions including multiple scorers #1073

Open
wants to merge 13 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
- Inspect View: display tool error messages in transcript when tool errors occur.
- Inspect View: display any completed samples even if the task fails because of an error
- Inspect View: don't display the 'input' column heading if there isn't an input
- Inspect View: allow filtering samples by compound expressions including multiple scorers.
- Open AI: Handle additional bad request status codes (mapping them to appropriate `StopReason`)
- Open AI: Use new `max_completion_tokens` option for o1 full.
- Web Browser: raise error when both `error` and `web_at` fields are present in response.
Expand Down
27 changes: 27 additions & 0 deletions src/inspect_ai/_view/www/App.css
Original file line number Diff line number Diff line change
Expand Up @@ -650,6 +650,29 @@ table.table.table-sm td {
height: auto !important;
}

[data-tooltip] {
position: relative;
}
[data-tooltip]:hover::after {
content: attr(data-tooltip);
position: absolute;
line-height: 1.25;
background: var(--bs-gray-300);
color: var(--bs-body-color);
padding: 4px 8px;
border-radius: 4px;
border: 1px solid var(--bs-gray-400);
box-shadow: 0 2px 10px 0 rgba(0, 0, 0, 0.25);
white-space: pre-wrap;
width: max-content;
max-width: 400px;
z-index: 1000;
}
[data-tooltip][data-tooltip-position="bottom-left"]:hover::after {
right: 0%;
top: 100%;
}

/* ANSI Coloring */
.ansi-display {
font-family: monospace;
Expand Down Expand Up @@ -706,6 +729,10 @@ from {
overflow: unset;
}

.custom-dropdown-item:hover {
background-color: var(--bs-secondary-bg);
}

pre[class*="language-"].tool-output,
.tool-output {
background-color: #f8f8f8;
Expand Down
27 changes: 27 additions & 0 deletions src/inspect_ai/_view/www/dist/assets/index.css
Original file line number Diff line number Diff line change
Expand Up @@ -14923,6 +14923,29 @@ table.table.table-sm td {
height: auto !important;
}

[data-tooltip] {
position: relative;
}
[data-tooltip]:hover::after {
content: attr(data-tooltip);
position: absolute;
line-height: 1.25;
background: var(--bs-gray-300);
color: var(--bs-body-color);
padding: 4px 8px;
border-radius: 4px;
border: 1px solid var(--bs-gray-400);
box-shadow: 0 2px 10px 0 rgba(0, 0, 0, 0.25);
white-space: pre-wrap;
width: max-content;
max-width: 400px;
z-index: 1000;
}
[data-tooltip][data-tooltip-position="bottom-left"]:hover::after {
right: 0%;
top: 100%;
}

/* ANSI Coloring */
.ansi-display {
font-family: monospace;
Expand Down Expand Up @@ -14979,6 +15002,10 @@ from {
overflow: unset;
}

.custom-dropdown-item:hover {
background-color: var(--bs-secondary-bg);
}

pre[class*="language-"].tool-output,
.tool-output {
background-color: #f8f8f8;
Expand Down
26,694 changes: 24,667 additions & 2,027 deletions src/inspect_ai/_view/www/dist/assets/index.js

Large diffs are not rendered by default.

2 changes: 2 additions & 0 deletions src/inspect_ai/_view/www/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,10 @@
"bootstrap": "^5.3.3",
"bootstrap-icons": "^1.11.3",
"clipboard": "^2.0.11",
"codemirror": "^6.0.1",
"fast-json-patch": "^3.1.1",
"fflate": "^0.8.2",
"filtrex": "^3.1.0",
"htm": "^3.1.1",
"json": "^11.0.0",
"json5": "^2.2.3",
Expand Down
18 changes: 8 additions & 10 deletions src/inspect_ai/_view/www/src/App.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ import {
} from "./samples/SamplesDescriptor.mjs";
import { byEpoch, bySample, sortSamples } from "./samples/tools/SortFilter.mjs";
import { resolveAttachments } from "./utils/attachments.mjs";
import { filterFnForType } from "./samples/tools/filters.mjs";
import { filterSamples } from "./samples/tools/filters.mjs";

import {
kEvalWorkspaceTabId,
Expand Down Expand Up @@ -308,21 +308,19 @@ export function App({

useEffect(() => {
const samples = selectedLog?.contents?.sampleSummaries || [];
const filtered = samples.filter((sample) => {
const { result: prefiltered } = filterSamples(
evalDescriptor,
samples,
filter?.value,
);
const filtered = prefiltered.filter((sample) => {
// Filter by epoch if specified
if (epoch && epoch !== "all") {
if (epoch !== sample.epoch + "") {
return false;
}
}

// Apply the filter
const filterFn = filterFnForType(filter);
if (filterFn && filter.value) {
return filterFn(samplesDescriptor, sample, filter.value);
} else {
return true;
}
return true;
});

// Sort the samples
Expand Down
1 change: 0 additions & 1 deletion src/inspect_ai/_view/www/src/Types.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@
/**
* @typedef {Object} ScoreFilter
* @property {string} [value]
* @property {string} [type]
*/

/**
Expand Down
3 changes: 3 additions & 0 deletions src/inspect_ai/_view/www/src/navbar/Navbar.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import { SecondaryBar } from "./SecondaryBar.mjs";
* @param {import("../types/log").EvalPlan} [props.evalPlan] - The EvalSpec
* @param {import("../types/log").EvalStats} [props.evalStats] - The EvalStats
* @param {import("../api/Types.mjs").SampleSummary[]} [props.samples] - the samples
* @param {import("../samples/SamplesDescriptor.mjs").EvalDescriptor} [props.evalDescriptor] - The EvalDescriptor
* @param {string} [props.status] - the status
* @param {boolean} props.offcanvas - Are we in offcanvas mode?
* @param {boolean} props.showToggle - Should we show the toggle?
Expand All @@ -32,6 +33,7 @@ export const Navbar = ({
evalResults,
evalStats,
samples,
evalDescriptor,
showToggle,
offcanvas,
status,
Expand Down Expand Up @@ -182,6 +184,7 @@ export const Navbar = ({
evalResults=${evalResults}
evalStats=${evalStats}
samples=${samples}
evalDescriptor=${evalDescriptor}
status=${status}
style=${{ gridColumn: "1/-1" }}
/>
Expand Down
45 changes: 27 additions & 18 deletions src/inspect_ai/_view/www/src/navbar/SecondaryBar.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ import { html } from "htm/preact";
import { LabeledValue } from "../components/LabeledValue.mjs";
import { formatDataset, formatDuration } from "../utils/Format.mjs";
import { ExpandablePanel } from "../components/ExpandablePanel.mjs";
import { scoreFilterItems } from "../samples/tools/filters.mjs";

/**
* Renders the Navbar
Expand All @@ -13,6 +14,7 @@ import { ExpandablePanel } from "../components/ExpandablePanel.mjs";
* @param {import("../types/log").EvalResults} [props.evalResults] - The EvalResults
* @param {import("../types/log").EvalStats} [props.evalStats] - The EvalStats
* @param {import("../api/Types.mjs").SampleSummary[]} [props.samples] - the samples
* @param {import("../samples/SamplesDescriptor.mjs").EvalDescriptor} [props.evalDescriptor] - The EvalDescriptor
* @param {string} [props.status] - the status
* @param {Map<string, string>} [props.style] - is this off canvas
*
Expand All @@ -24,6 +26,7 @@ export const SecondaryBar = ({
evalResults,
evalStats,
samples,
evalDescriptor,
status,
style,
}) => {
Expand Down Expand Up @@ -56,19 +59,10 @@ export const SecondaryBar = ({
`,
});

const label = evalResults?.scores.length > 1 ? "Scorers" : "Scorer";
values.push({
size: "minmax(12%, auto)",
value: html`<${LabeledValue} label="${label}" style=${staticColStyle} style=${{ justifySelf: hasConfig ? "left" : "center" }}>
<${ScorerSummary}
scorers=${evalResults?.scores} />
</${LabeledValue}>`,
});

if (hasConfig) {
values.push({
size: "minmax(12%, auto)",
value: html`<${LabeledValue} label="Config" style=${{ justifySelf: "right" }}>
value: html`<${LabeledValue} label="Config" style=${{ justifySelf: "center" }}>
<${ParamSummary} params=${hyperparameters}/>
</${LabeledValue}>`,
});
Expand All @@ -81,11 +75,20 @@ export const SecondaryBar = ({
values.push({
size: "minmax(12%, auto)",
value: html`
<${LabeledValue} label="Duration" style=${{ justifySelf: "right" }}>
<${LabeledValue} label="Duration" style=${{ justifySelf: "center" }}>
${totalDuration}
</${LabeledValue}>`,
});

const label = evalResults?.scores.length > 1 ? "Scorers" : "Scorer";
values.push({
size: "minmax(12%, auto)",
value: html`<${LabeledValue} label="${label}" style=${staticColStyle} style=${{ justifySelf: "right" }}>
<${ScorerSummary}
evalDescriptor=${evalDescriptor} />
</${LabeledValue}>`,
});

return html`
<${ExpandablePanel} style=${{ margin: "0", ...style }} collapse=${true} lines=${4}>
<div
Expand Down Expand Up @@ -124,17 +127,23 @@ const DatasetSummary = ({ dataset, samples, epochs, style }) => {
`;
};

const ScorerSummary = ({ scorers }) => {
if (!scorers) {
const ScorerSummary = ({ evalDescriptor }) => {
if (!evalDescriptor) {
return "";
}

const uniqScorers = new Set();
scorers.forEach((scorer) => {
uniqScorers.add(scorer.name);
});
const items = scoreFilterItems(evalDescriptor);

return Array.from(uniqScorers).join(", ");
return html`
<span style=${{ position: "relative" }}>
${Array.from(items).map(
(item, index) => html`
${index > 0 ? ", " : ""}
<span title=${item.tooltip}>${item.canonicalName}</span>
`,
)}
</span>
`;
};

/**
Expand Down
36 changes: 24 additions & 12 deletions src/inspect_ai/_view/www/src/samples/SamplesDescriptor.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,29 @@ import {
* @property {number} normalized.limit - Normalized size of the limit message.
*/

/**
* @param {import("../Types.mjs").ScoreLabel | undefined} scoreLabel
* @returns {string}
*/
export const scoreLabelKey = (scoreLabel) => {
if (!scoreLabel) {
return "No score key";
}
return `${scoreLabel.scorer}.${scoreLabel.name}`;
};

/**
* @param {string} key
* @returns {import("../Types.mjs").ScoreLabel | undefined}
*/
export const parseScoreLabelKey = (key) => {
if (key == "No score key") {
return undefined;
}
const [scorer, name] = key.split(".");
return { scorer, name };
};

/**
* @param {import("../Types.mjs").ScoreLabel[]} scores - the list of available scores
* @param {import("../api/Types.mjs").SampleSummary[]} samples - the list of sample summaries
Expand Down Expand Up @@ -165,17 +188,6 @@ export const createEvalDescriptor = (scores, samples, epochs) => {
return undefined;
};

/**
* @param {import("../Types.mjs").ScoreLabel} [scoreLabel]
* @returns {string}
*/
const scoreLabelKey = (scoreLabel) => {
if (!scoreLabel) {
return "No score key";
}
return `${scoreLabel.scorer}.${scoreLabel.name}`;
};

/**
* The EvalDescriptor is memoized. Compute all descriptors now to avoid duplicate work.
* @type {Map<string, ScoreDescriptor>}
Expand Down Expand Up @@ -462,7 +474,7 @@ const scoreCategorizers = [
* @returns {ScoreDescriptor} a ScoreDescriptor
*/
describe: (values, types) => {
if (values.length === 2 && types.length === 1 && types[0] === "boolean") {
if (types.length === 1 && types[0] === "boolean") {
return booleanScoreCategorizer();
}
},
Expand Down
16 changes: 8 additions & 8 deletions src/inspect_ai/_view/www/src/samples/SamplesTools.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,14 @@ export const SampleTools = (props) => {
const hasEpochs = epochs > 1;
const tools = [];

tools.push(
html`<${SampleFilter}
evalDescriptor=${sampleDescriptor.evalDescriptor}
filter=${filter}
filterChanged=${filterChanged}
/>`,
);

if (scores.length > 1) {
tools.push(
html`<${SelectScorer}
Expand All @@ -43,14 +51,6 @@ export const SampleTools = (props) => {
);
}

tools.push(
html`<${SampleFilter}
filter=${filter}
filterChanged=${filterChanged}
descriptor=${sampleDescriptor}
/>`,
);

tools.push(
html`<${SortFilter}
sampleDescriptor=${sampleDescriptor}
Expand Down
Loading
Loading