Skip to content

Commit

Permalink
release v0.3.7
Browse files Browse the repository at this point in the history
  • Loading branch information
aisi-inspect committed May 7, 2024
1 parent 2a0b6b9 commit 8406a69
Show file tree
Hide file tree
Showing 116 changed files with 15,485 additions and 712 deletions.
34 changes: 34 additions & 0 deletions .github/workflows/vscode.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
on:
push:
tags:
- "v[0-9]*"
branches:
- "main"
pull_request:
branches:
- "main"
workflow_dispatch:

name: Deploy Extension
jobs:
deploy:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: actions/setup-node@v4
with:
node-version: "18.x"
- run: |
pushd tools/vscode
yarn install --immutable --immutable-cache --check-cache
- name: Build Extension
run: |
pushd tools/vscode
yarn vsce package
- name: Upload extension to Actions Artifact
uses: actions/upload-artifact@v4
with:
name: inspect-vscode
path: "tools/vscode/inspect*.vsix"
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
# Changelog

## v0.3.7 (07 May 2024)

- Add support for logprobs to HF provider, and create uniform API for other providers that support logprobs (Together and OpenAI).
- Provide an option to merge asssistant messages and use it for Anthropoic models (as they don't allow consecutive assistant messages).
- Supporting infrastructure in Inspect CLI for VS Code extension (additional list and info commands).

## v0.3.6 (06 May 2024)

- Show first log file immediately (don't wait for fetching metadata for other logs)
Expand Down
281 changes: 2 additions & 279 deletions benchmarks/mmlu.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,9 @@
inspect eval mmlu.py@mmlu --limit 500 -T cot=true
# eval selected subjects
inspect eval mmlu.py@mmlu -T subjects=anatomy
inspect eval mmlu.py@mmlu -T subjects=astronomy
inspect eval mmlu.py@mmlu -T subjects=anatomy,astronomy
# eval single subjects
inspect eval mmlu.py@mmlu_anatomy
inspect eval mmlu.py@mmlu_astronomy
"""

from inspect_ai import Task, task
Expand Down Expand Up @@ -71,278 +69,3 @@ def mmlu(subjects=[], cot=False):
scorer=answer("letter"),
config=GenerateConfig(temperature=0.5),
)


@task
def mmlu_abstract_algebra(cot=False):
return mmlu("abstract_algebra", cot)


@task
def mmlu_anatomy(cot=False):
return mmlu("anatomy", cot)


@task
def mmlu_astronomy(cot=False):
return mmlu("astronomy", cot)


@task
def mmlu_business_ethics(cot=False):
return mmlu("business_ethics", cot)


@task
def mmlu_clinical_knowledge(cot=False):
return mmlu("clinical_knowledge", cot)


@task
def mmlu_college_biology(cot=False):
return mmlu("college_biology", cot)


@task
def mmlu_college_chemistry(cot=False):
return mmlu("college_chemistry", cot)


@task
def mmlu_college_computer_science(cot=False):
return mmlu("college_computer_science", cot)


@task
def mmlu_college_mathematics(cot=False):
return mmlu("college_mathematics", cot)


@task
def mmlu_college_medicine(cot=False):
return mmlu("college_medicine", cot)


@task
def mmlu_college_physics(cot=False):
return mmlu("college_physics", cot)


@task
def mmlu_computer_security(cot=False):
return mmlu("computer_security", cot)


@task
def mmlu_conceptual_physics(cot=False):
return mmlu("conceptual_physics", cot)


@task
def mmlu_electrical_engineering(cot=False):
return mmlu("electrical_engineering", cot)


@task
def mmlu_elementary_mathematics(cot=False):
return mmlu("elementary_mathematics", cot)


@task
def mmlu_formal_logic(cot=False):
return mmlu("formal_logic", cot)


@task
def mmlu_global_facts(cot=False):
return mmlu("global_facts", cot)


@task
def mmlu_high_school_biology(cot=False):
return mmlu("high_school_biology", cot)


@task
def mmlu_high_school_chemistry(cot=False):
return mmlu("high_school_chemistry", cot)


@task
def mmlu_high_school_computer_science(cot=False):
return mmlu("high_school_computer_science", cot)


@task
def mmlu_high_school_european_history(cot=False):
return mmlu("high_school_european_history", cot)


@task
def mmlu_high_school_geography(cot=False):
return mmlu("high_school_geography", cot)


@task
def mmlu_high_school_government_and_politics(cot=False):
return mmlu("high_school_government_and_politics", cot)


@task
def mmlu_high_school_macroeconomics(cot=False):
return mmlu("high_school_macroeconomics", cot)


@task
def mmlu_high_school_mathematics(cot=False):
return mmlu("high_school_mathematics", cot)


@task
def mmlu_high_school_microeconomics(cot=False):
return mmlu("high_school_microeconomics", cot)


@task
def mmlu_high_school_physics(cot=False):
return mmlu("high_school_physics", cot)


@task
def mmlu_high_school_psychology(cot=False):
return mmlu("high_school_psychology", cot)


@task
def mmlu_high_school_statistics(cot=False):
return mmlu("high_school_statistics", cot)


@task
def mmlu_high_school_us_history(cot=False):
return mmlu("high_school_us_history", cot)


@task
def mmlu_high_school_world_history(cot=False):
return mmlu("high_school_world_history", cot)


@task
def mmlu_human_aging(cot=False):
return mmlu("human_aging", cot)


@task
def mmlu_human_sexuality(cot=False):
return mmlu("human_sexuality", cot)


@task
def mmlu_international_law(cot=False):
return mmlu("international_law", cot)


@task
def mmlu_jurisprudence(cot=False):
return mmlu("jurisprudence", cot)


@task
def mmlu_logical_fallacies(cot=False):
return mmlu("logical_fallacies", cot)


@task
def mmlu_machine_learning(cot=False):
return mmlu("machine_learning", cot)


@task
def mmlu_management(cot=False):
return mmlu("management", cot)


@task
def mmlu_marketing(cot=False):
return mmlu("marketing", cot)


@task
def mmlu_miscellaneous(cot=False):
return mmlu("miscellaneous", cot)


@task
def mmlu_moral_disputes(cot=False):
return mmlu("moral_disputes", cot)


@task
def mmlu_moral_scenarios(cot=False):
return mmlu("moral_scenarios", cot)


@task
def mmlu_nutrition(cot=False):
return mmlu("nutrition", cot)


@task
def mmlu_philosophy(cot=False):
return mmlu("philosophy", cot)


@task
def mmlu_prehistory(cot=False):
return mmlu("prehistory", cot)


@task
def mmlu_professional_accounting(cot=False):
return mmlu("professional_accounting", cot)


@task
def mmlu_professional_law(cot=False):
return mmlu("professional_law", cot)


@task
def mmlu_professional_medicine(cot=False):
return mmlu("professional_medicine", cot)


@task
def mmlu_professional_psychology(cot=False):
return mmlu("professional_psychology", cot)


@task
def mmlu_public_relations(cot=False):
return mmlu("public_relations", cot)


@task
def mmlu_security_studies(cot=False):
return mmlu("security_studies", cot)


@task
def mmlu_sociology(cot=False):
return mmlu("sociology", cot)


@task
def mmlu_us_foreign_policy(cot=False):
return mmlu("us_foreign_policy", cot)


@task
def mmlu_virology(cot=False):
return mmlu("virology", cot)


@task
def mmlu_world_religions(cot=False):
return mmlu("world_religions", cot)
4 changes: 2 additions & 2 deletions src/inspect_ai/_cli/eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,12 +156,12 @@
"--logprobs",
type=bool,
is_flag=True,
help="Return log probabilities of the output tokens. OpenAI and TogetherAI only.",
help="Return log probabilities of the output tokens. OpenAI, TogetherAI, and Huggingface only.",
)
@click.option(
"--top-logprobs",
type=int,
help="Number of most likely tokens (0-20) to return at each token position, each with an associated log probability. OpenAI only.",
help="Number of most likely tokens (0-20) to return at each token position, each with an associated log probability. OpenAI and Huggingface only.",
)
@common_options
def eval_command(
Expand Down
11 changes: 10 additions & 1 deletion src/inspect_ai/_cli/info.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
from json import dumps

import click
from pydantic_core import to_jsonable_python

from inspect_ai import __version__
from inspect_ai._util.constants import PKG_PATH
from inspect_ai.log import eval_log_json, read_eval_log
from inspect_ai.log._file import eval_log_json, read_eval_log, read_eval_log_headers


@click.group("info")
Expand Down Expand Up @@ -44,6 +45,14 @@ def log(path: str, header_only: bool) -> None:
print(eval_log_json(log))


@info_command.command("log-file-headers")
@click.argument("files", nargs=-1)
def log_file_headers(files: tuple[str]) -> None:
"""Read and print a JSON list of log file headers."""
headers = read_eval_log_headers(list(files))
print(dumps(to_jsonable_python(headers), indent=2))


@info_command.command("log-schema")
def log_schema() -> None:
"""Print JSON schema for log files."""
Expand Down
Loading

0 comments on commit 8406a69

Please sign in to comment.