Skip to content

Commit

Permalink
sync 01-05-24
Browse files Browse the repository at this point in the history
  • Loading branch information
aisi-inspect committed May 1, 2024
1 parent 6cfb4fe commit b7dac03
Show file tree
Hide file tree
Showing 19 changed files with 178 additions and 95 deletions.
7 changes: 5 additions & 2 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
# Changelog

## v0.3.4 (Unreleased)
## v0.3.4 (01 May 2024)

- `write_eval_log()` now ignores unserializable objects in metadata fields.
- `read_eval_log()` now takes a `str` or `FileInfo` (for compatibility w/ list returned from `list_eval_logs()`).
- Registry name looks are now case sensitive (fixes issue w/ loading tasks w/ mixed case names).
- Resiliancy to Python syntax errors that occur when enumerating tasks in a directory.
- Do not throw error if unable to parse or load `.ipynb` file due to lack of dependencies (e.g. `nbformat`).
- Several small improvements to markdown rendering in log viewer (don't render intraword underscores, escape html tags).
- Various additions to log viewer display (log file name, dataset/scorer in listing, filter by complex score types).
- Improvements to markdown rendering in log viewer (don't render intraword underscores, escape html tags).

## v0.3.3 (28 April 2024)

Expand Down
4 changes: 2 additions & 2 deletions docs/_quarto.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,9 @@ book:
repo-actions: [issue]
downloads: [pdf, epub, docx]
twitter-card:
description: "A framework for large language model evaluations"
description: "Open-source framework for large language model evaluations"
open-graph:
description: "A framework for large language model evaluations"
description: "Open-source framework for large language model evaluations"
sidebar:
header: >
[![](images/aisi-logo.png)](https://www.gov.uk/government/organisations/ai-safety-institute)
Expand Down
9 changes: 8 additions & 1 deletion docs/theme.scss
Original file line number Diff line number Diff line change
Expand Up @@ -38,4 +38,11 @@

.splash ul {
padding-inline-start: 1rem;
}
}

@media(max-width: 991.98px) {
.sidebar-header-item .img-fluid {
max-width: 195px;
}
}

54 changes: 31 additions & 23 deletions src/inspect_ai/_eval/list.py
Original file line number Diff line number Diff line change
Expand Up @@ -248,17 +248,21 @@ def exec_filter(cells: list[str]) -> bool:


def code_has_task(code: str) -> bool:
tree = ast.parse(code)
for node in ast.iter_child_nodes(tree):
if isinstance(node, ast.FunctionDef):
for decorator in node.decorator_list:
if isinstance(decorator, ast.Name):
if str(decorator.id) == "task":
return True
elif isinstance(decorator, ast.Call):
if isinstance(decorator.func, ast.Name):
if str(decorator.func.id) == "task":
try:
tree = ast.parse(code)
for node in ast.iter_child_nodes(tree):
if isinstance(node, ast.FunctionDef):
for decorator in node.decorator_list:
if isinstance(decorator, ast.Name):
if str(decorator.id) == "task":
return True
elif isinstance(decorator, ast.Call):
if isinstance(decorator.func, ast.Name):
if str(decorator.func.id) == "task":
return True
except SyntaxError:
pass

return False


Expand All @@ -283,20 +287,24 @@ def parse_tasks(path: Path, root_dir: Path, absolute: bool) -> list[TaskInfo]:

# parse the top level tasks out of the code
tasks: list[TaskInfo] = []
tree = ast.parse(code)
for node in ast.iter_child_nodes(tree):
if isinstance(node, ast.FunctionDef):
for decorator in node.decorator_list:
result = parse_decorator(node, decorator)
if result:
name, attribs = result
tasks.append(
TaskInfo(
file=task_path(path, root_dir, absolute),
name=name,
attribs=attribs,
try:
tree = ast.parse(code)
for node in ast.iter_child_nodes(tree):
if isinstance(node, ast.FunctionDef):
for decorator in node.decorator_list:
result = parse_decorator(node, decorator)
if result:
name, attribs = result
tasks.append(
TaskInfo(
file=task_path(path, root_dir, absolute),
name=name,
attribs=attribs,
)
)
)
except SyntaxError:
pass

return tasks


Expand Down
16 changes: 12 additions & 4 deletions src/inspect_ai/_util/notebook.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@

from IPython import get_ipython # type: ignore
from IPython.core.interactiveshell import InteractiveShell
from nbformat import read
from nbformat import NBFormatError, ValidationError, read
from nbformat.reader import NotJSONError

# from https://jupyter-notebook.readthedocs.io/en/stable/examples/Notebook/Importing%20Notebooks.html

Expand Down Expand Up @@ -64,9 +65,16 @@ def load_module(self, fullname: str) -> types.ModuleType:


def read_notebook_code(path: Path) -> str:
# load the notebook object
with io.open(path, "r", encoding="utf-8") as f:
nb = read(f, 4) # type: ignore
try:
# load the notebook object
with io.open(path, "r", encoding="utf-8") as f:
nb = read(f, 4) # type: ignore
except NotJSONError:
return ""
except ValidationError:
return ""
except NBFormatError:
return ""

# for dealing w/ magics
shell = InteractiveShell.instance()
Expand Down
2 changes: 1 addition & 1 deletion src/inspect_ai/_util/registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ def registry_name(o: object, name: str) -> str:
and if it is, preprends the package name as a namespace
"""
package = get_package_name(o)
return (f"{package}/{name}" if package else name).lower()
return f"{package}/{name}" if package else name


def registry_lookup(type: RegistryType, name: str) -> object | None:
Expand Down
38 changes: 25 additions & 13 deletions src/inspect_ai/_view/www/App.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import "./src/Register.mjs";
import { icons } from "./src/Constants.mjs";
import { WorkSpace } from "./src/workspace/WorkSpace.mjs";
import { eval_log } from "./api.mjs";
import { CopyButton } from "./src/components/CopyButton.mjs";

export function App() {
const [selected, setSelected] = useState(0);
Expand Down Expand Up @@ -89,7 +90,7 @@ export function App() {
const appEnvelope = fullScreen
? ""
: html`
<${Header} logs=${logs} offcanvas=${offcanvas} />
<${Header} logs=${logs} selected=${selected} offcanvas=${offcanvas} />
<${Sidebar}
logs=${logs}
logHeaders=${logHeaders}
Expand Down Expand Up @@ -123,6 +124,14 @@ export function App() {
const Header = (props) => {
const toggleOffCanClass = props.offcanvas ? "" : " d-md-none";
const gearOffCanClass = props.offcanvas ? "" : " d-md-inline";

const logFiles = props.logs.files || [];
const logSelected = props.selected || 0;
const logUri = logFiles.length > logSelected ? logFiles[logSelected].name : "";
const logName =logUri.split('/').pop();



return html`
<nav class="navbar sticky-top shadow-sm" style=${{ flexWrap: "nowrap" }}>
<div class="container-fluid">
Expand Down Expand Up @@ -152,19 +161,19 @@ const Header = (props) => {
></i>
<span> Inspect View </span>
</span>
<span
<div
class="navbar-text"
style=${{
paddingTop: "0.3rem",
paddingBottom: 0,
fontSize: "1rem",
fontSize: "0.8rem",
whiteSpace: "nowrap",
textOverflow: "ellipsis",
overflow: "hidden",
}}
>
${props.logs.log_dir}
</span>
${logName}<${CopyButton} value=${logUri}/>
</div>
</div>
</nav>
`;
Expand Down Expand Up @@ -218,6 +227,9 @@ const Sidebar = (props) => {
...logHeader.eval?.task_args,
} : undefined;
const model = logHeader?.eval?.model;
const dataset = logHeader?.eval?.dataset;
const scorer = logHeader?.results?.scorer?.name;
return html`
<li
Expand All @@ -235,7 +247,7 @@ const Sidebar = (props) => {
<div>
<div
style=${{
fontSize: "1.4em",
fontSize: "1.5em",
fontWeight: "600",
}}
>
Expand All @@ -249,11 +261,7 @@ const Sidebar = (props) => {
})}
</small>
${logHeader?.eval?.model
? html` <div>
<small> ${logHeader?.eval.model} </small>
</div>`
: ""}
${model ? html` <div><small class="mb-1 text-muted">${model}</small></div>`: ""}
</div>
${logHeader?.results?.metrics
? html`<div style=${{display: "flex", flexDirection: "row", flexWrap: "wrap", justifyContent: "flex-end" }}>
Expand All @@ -274,7 +282,7 @@ const Sidebar = (props) => {
>
${logHeader?.results.metrics[metric].name}
</div>
<div style=${{fontWeight: 600, fontSize: "1.4em"}}>
<div style=${{fontWeight: 600, fontSize: "1.5em"}}>
${formatPrettyDecimal(
logHeader?.results.metrics[metric].value
)}
Expand All @@ -286,13 +294,17 @@ const Sidebar = (props) => {
</div>`
: logHeader?.status === "error" ? html`<div style=${{color: "var(--bs-danger)"}}>Eval Error</div>` : ""}
</div>
<small style=${{ marginTop: "0.4em" }}>
<div style=${{ marginTop: "0.4em" }}>
<small class="mb-1 text-muted">
${
hyperparameters ? Object.keys((hyperparameters)).map((key) => {
return `${key}: ${hyperparameters[key]}`
}).join(", ") : ""
}
</small>
</div>
${dataset || scorer ? html`<div style=${{display: "flex", justifyContent: "space-between", marginTop: "0.5em" }}><span>dataset: ${dataset.name || "(samples)"}</span><span>scorer: ${scorer}</span></div>` : ""}
</li>
`;
})}
Expand Down
2 changes: 1 addition & 1 deletion src/inspect_ai/_view/www/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@
<script src="./libs/showdown.min.js"></script>
<script>
window.document.addEventListener("DOMContentLoaded", function (_event) {
new ClipboardJS(".clipboard-button");
new ClipboardJS(".clipboard-button,.copy-button");
});
</script>
</body>
Expand Down
2 changes: 1 addition & 1 deletion src/inspect_ai/_view/www/src/Constants.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ export const icons = {
"close": "bi bi-x",
config: "bi bi-gear",
confirm: "bi bi-check",
copy: "bi bi-clipboard",
copy: "bi bi-copy",

epoch: (epoch) => {
return `bi bi-${epoch}-circle`;
Expand Down
22 changes: 22 additions & 0 deletions src/inspect_ai/_view/www/src/components/CopyButton.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
import { html } from "htm/preact";
import { icons } from "../Constants.mjs";

export const CopyButton = ({ value }) => {
return html`<button
class="copy-button"
style=${{ border: "none", backgroundColor: "inherit", opacity: "0.5" }}
data-clipboard-text=${value}
onclick=${(e) => {
const iEl = e.target;
if (iEl) {
iEl.className = `${icons.confirm} primary`
setTimeout(() => {
iEl.className = icons.copy;
}, 1250);
}
return false;
}}
>
<i class=${icons.copy}></i>
</button>`;
};
16 changes: 4 additions & 12 deletions src/inspect_ai/_view/www/src/title/TitleBlock.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ import { html } from "htm/preact";

import { icons } from "../Constants.mjs";
import { LabeledValue } from "../components/LabeledValue.mjs";
import { formatPrettyDecimal } from "../utils/Format.mjs";
import { formatPrettyDecimal, formatDataset } from "../utils/Format.mjs";

export const TitleBlock = ({
title,
Expand Down Expand Up @@ -160,20 +160,12 @@ const DatasetSummary = ({ dataset, samples, epochs, style }) => {
return "";
}

const sampleCount = epochs > 0 ? samples.length / epochs : samples;
console

return html`
<div style=${style}>
${dataset.name}${samples?.length
? html` <span
style=${{ fontSize: "0.9em" }}
>
${dataset.name ? "— " : ""}${sampleCount + " "}${epochs > 1
? `x ${epochs} `
: ""}
${samples.length === 1 ? "sample" : "samples"}</span
>`
? html` <span style=${{ fontSize: "0.9em" }}>
${formatDataset(dataset.name, samples.length, epochs)}
</span>`
: ""}
</div>
`;
Expand Down
5 changes: 5 additions & 0 deletions src/inspect_ai/_view/www/src/utils/Format.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,11 @@ export const answerForSample = (sample) => {
}
};

export const formatDataset = (name, samples, epochs) => {
const perEpochSamples = epochs > 0 ? samples / epochs : samples;
return `${name ? "— " : ""}${perEpochSamples + " "}${epochs > 1 ? `x ${epochs} ` : ""}${samples === 1 ? "sample" : "samples"}`;
}

export const userPromptForSample = (sample) => {
if (sample) {
if (typeof (sample.input) == "string") {
Expand Down
22 changes: 22 additions & 0 deletions src/inspect_ai/_view/www/src/workspace/SampleFilter.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import { isNumeric } from "../utils/Type.mjs";
import {
kScoreTypeCategorical,
kScoreTypeNumeric,
kScoreTypeObject,
kScoreTypePassFail,
} from "./SamplesDescriptor.mjs";

Expand All @@ -21,6 +22,8 @@ export const SampleFilter = ({ descriptor, filter, filterChanged }) => {
filterFn: (sample, value) => {
if (typeof sample.score.value === "string") {
return sample.score.value.toLowerCase() === value?.toLowerCase();
} else if (typeof sample.score.value === "object") {
return JSON.stringify(sample.score.value) == value;
} else {
return sample.score.value === value;
}
Expand Down Expand Up @@ -76,6 +79,25 @@ export const SampleFilter = ({ descriptor, filter, filterChanged }) => {
`;
}

case kScoreTypeObject: {
if (!descriptor.scoreDescriptor.categories) {
return "";
}
const options = [{ text: "All", value: "all" }];
options.push(
...descriptor.scoreDescriptor.categories.map((cat) => {
return { text: cat.text, value: cat.value};
})
);



return html`<${SelectFilter}
options=${options}
filterFn=${filterCategory}
/>`;
}

default: {
return undefined;
}
Expand Down
Loading

0 comments on commit b7dac03

Please sign in to comment.