From b7dac034d1461101c314482b7dceeeed9b5293cc Mon Sep 17 00:00:00 2001 From: aisi-inspect <166920645+aisi-inspect@users.noreply.github.com> Date: Wed, 1 May 2024 17:45:16 +0000 Subject: [PATCH] sync 01-05-24 --- CHANGELOG.md | 7 ++- docs/_quarto.yml | 4 +- docs/theme.scss | 9 +++- src/inspect_ai/_eval/list.py | 54 +++++++++++-------- src/inspect_ai/_util/notebook.py | 16 ++++-- src/inspect_ai/_util/registry.py | 2 +- src/inspect_ai/_view/www/App.mjs | 38 ++++++++----- src/inspect_ai/_view/www/index.html | 2 +- src/inspect_ai/_view/www/src/Constants.mjs | 2 +- .../_view/www/src/components/CopyButton.mjs | 22 ++++++++ .../_view/www/src/title/TitleBlock.mjs | 16 ++---- src/inspect_ai/_view/www/src/utils/Format.mjs | 5 ++ .../_view/www/src/workspace/SampleFilter.mjs | 22 ++++++++ .../www/src/workspace/SamplesDescriptor.mjs | 14 +++++ .../_view/www/src/workspace/WorkSpace.mjs | 10 ++-- src/inspect_ai/scorer/_metric.py | 14 ++--- src/inspect_ai/scorer/_scorer.py | 11 ++-- src/inspect_ai/solver/_solver.py | 17 +++--- tests/test_metric.py | 8 +-- 19 files changed, 178 insertions(+), 95 deletions(-) create mode 100644 src/inspect_ai/_view/www/src/components/CopyButton.mjs diff --git a/CHANGELOG.md b/CHANGELOG.md index 1c2b9487f..3f757e858 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,11 +1,14 @@ # Changelog -## v0.3.4 (Unreleased) +## v0.3.4 (01 May 2024) - `write_eval_log()` now ignores unserializable objects in metadata fields. - `read_eval_log()` now takes a `str` or `FileInfo` (for compatibility w/ list returned from `list_eval_logs()`). +- Registry name looks are now case sensitive (fixes issue w/ loading tasks w/ mixed case names). +- Resiliancy to Python syntax errors that occur when enumerating tasks in a directory. - Do not throw error if unable to parse or load `.ipynb` file due to lack of dependencies (e.g. `nbformat`). -- Several small improvements to markdown rendering in log viewer (don't render intraword underscores, escape html tags). +- Various additions to log viewer display (log file name, dataset/scorer in listing, filter by complex score types). +- Improvements to markdown rendering in log viewer (don't render intraword underscores, escape html tags). ## v0.3.3 (28 April 2024) diff --git a/docs/_quarto.yml b/docs/_quarto.yml index d3f4d1e66..981f22fa7 100644 --- a/docs/_quarto.yml +++ b/docs/_quarto.yml @@ -12,9 +12,9 @@ book: repo-actions: [issue] downloads: [pdf, epub, docx] twitter-card: - description: "A framework for large language model evaluations" + description: "Open-source framework for large language model evaluations" open-graph: - description: "A framework for large language model evaluations" + description: "Open-source framework for large language model evaluations" sidebar: header: > [![](images/aisi-logo.png)](https://www.gov.uk/government/organisations/ai-safety-institute) diff --git a/docs/theme.scss b/docs/theme.scss index 5ab87312d..e36b65b63 100644 --- a/docs/theme.scss +++ b/docs/theme.scss @@ -38,4 +38,11 @@ .splash ul { padding-inline-start: 1rem; -} \ No newline at end of file +} + +@media(max-width: 991.98px) { + .sidebar-header-item .img-fluid { + max-width: 195px; + } +} + diff --git a/src/inspect_ai/_eval/list.py b/src/inspect_ai/_eval/list.py index ae1b1a950..aab23166f 100644 --- a/src/inspect_ai/_eval/list.py +++ b/src/inspect_ai/_eval/list.py @@ -248,17 +248,21 @@ def exec_filter(cells: list[str]) -> bool: def code_has_task(code: str) -> bool: - tree = ast.parse(code) - for node in ast.iter_child_nodes(tree): - if isinstance(node, ast.FunctionDef): - for decorator in node.decorator_list: - if isinstance(decorator, ast.Name): - if str(decorator.id) == "task": - return True - elif isinstance(decorator, ast.Call): - if isinstance(decorator.func, ast.Name): - if str(decorator.func.id) == "task": + try: + tree = ast.parse(code) + for node in ast.iter_child_nodes(tree): + if isinstance(node, ast.FunctionDef): + for decorator in node.decorator_list: + if isinstance(decorator, ast.Name): + if str(decorator.id) == "task": return True + elif isinstance(decorator, ast.Call): + if isinstance(decorator.func, ast.Name): + if str(decorator.func.id) == "task": + return True + except SyntaxError: + pass + return False @@ -283,20 +287,24 @@ def parse_tasks(path: Path, root_dir: Path, absolute: bool) -> list[TaskInfo]: # parse the top level tasks out of the code tasks: list[TaskInfo] = [] - tree = ast.parse(code) - for node in ast.iter_child_nodes(tree): - if isinstance(node, ast.FunctionDef): - for decorator in node.decorator_list: - result = parse_decorator(node, decorator) - if result: - name, attribs = result - tasks.append( - TaskInfo( - file=task_path(path, root_dir, absolute), - name=name, - attribs=attribs, + try: + tree = ast.parse(code) + for node in ast.iter_child_nodes(tree): + if isinstance(node, ast.FunctionDef): + for decorator in node.decorator_list: + result = parse_decorator(node, decorator) + if result: + name, attribs = result + tasks.append( + TaskInfo( + file=task_path(path, root_dir, absolute), + name=name, + attribs=attribs, + ) ) - ) + except SyntaxError: + pass + return tasks diff --git a/src/inspect_ai/_util/notebook.py b/src/inspect_ai/_util/notebook.py index 9deea38bf..2a9305e85 100644 --- a/src/inspect_ai/_util/notebook.py +++ b/src/inspect_ai/_util/notebook.py @@ -6,7 +6,8 @@ from IPython import get_ipython # type: ignore from IPython.core.interactiveshell import InteractiveShell -from nbformat import read +from nbformat import NBFormatError, ValidationError, read +from nbformat.reader import NotJSONError # from https://jupyter-notebook.readthedocs.io/en/stable/examples/Notebook/Importing%20Notebooks.html @@ -64,9 +65,16 @@ def load_module(self, fullname: str) -> types.ModuleType: def read_notebook_code(path: Path) -> str: - # load the notebook object - with io.open(path, "r", encoding="utf-8") as f: - nb = read(f, 4) # type: ignore + try: + # load the notebook object + with io.open(path, "r", encoding="utf-8") as f: + nb = read(f, 4) # type: ignore + except NotJSONError: + return "" + except ValidationError: + return "" + except NBFormatError: + return "" # for dealing w/ magics shell = InteractiveShell.instance() diff --git a/src/inspect_ai/_util/registry.py b/src/inspect_ai/_util/registry.py index 0ad6aeae7..6d2c3ea36 100644 --- a/src/inspect_ai/_util/registry.py +++ b/src/inspect_ai/_util/registry.py @@ -93,7 +93,7 @@ def registry_name(o: object, name: str) -> str: and if it is, preprends the package name as a namespace """ package = get_package_name(o) - return (f"{package}/{name}" if package else name).lower() + return f"{package}/{name}" if package else name def registry_lookup(type: RegistryType, name: str) -> object | None: diff --git a/src/inspect_ai/_view/www/App.mjs b/src/inspect_ai/_view/www/App.mjs index e7e6a4ca2..3aa285421 100644 --- a/src/inspect_ai/_view/www/App.mjs +++ b/src/inspect_ai/_view/www/App.mjs @@ -10,6 +10,7 @@ import "./src/Register.mjs"; import { icons } from "./src/Constants.mjs"; import { WorkSpace } from "./src/workspace/WorkSpace.mjs"; import { eval_log } from "./api.mjs"; +import { CopyButton } from "./src/components/CopyButton.mjs"; export function App() { const [selected, setSelected] = useState(0); @@ -89,7 +90,7 @@ export function App() { const appEnvelope = fullScreen ? "" : html` - <${Header} logs=${logs} offcanvas=${offcanvas} /> + <${Header} logs=${logs} selected=${selected} offcanvas=${offcanvas} /> <${Sidebar} logs=${logs} logHeaders=${logHeaders} @@ -123,6 +124,14 @@ export function App() { const Header = (props) => { const toggleOffCanClass = props.offcanvas ? "" : " d-md-none"; const gearOffCanClass = props.offcanvas ? "" : " d-md-inline"; + + const logFiles = props.logs.files || []; + const logSelected = props.selected || 0; + const logUri = logFiles.length > logSelected ? logFiles[logSelected].name : ""; + const logName =logUri.split('/').pop(); + + + return html` `; @@ -218,6 +227,9 @@ const Sidebar = (props) => { ...logHeader.eval?.task_args, } : undefined; + const model = logHeader?.eval?.model; + const dataset = logHeader?.eval?.dataset; + const scorer = logHeader?.results?.scorer?.name; return html`