diff --git a/docs/scorers.qmd b/docs/scorers.qmd index 8f88b5c2c..abddfa6c0 100644 --- a/docs/scorers.qmd +++ b/docs/scorers.qmd @@ -257,6 +257,14 @@ Note also we use the `input_text` property of the `TaskState` to access a string ## Multiple Scorers {#sec-multiple-scorers} +::: {.callout-note appearance="simple"} +The multiple scorers feature described below is available in only the development version of Inspect (it is not yet published to PyPI). You can install the development version with: + +```bash +$ pip install git+https://github.com/UKGovernmentBEIS/inspect_ai +``` +::: + There are several ways to use multiple scorers in an evaluation: 1. You can provide a list of scorers in a `Task` definition (this is the best option when scorers are entirely independent) diff --git a/src/inspect_ai/_eval/context.py b/src/inspect_ai/_eval/context.py index c7e149f1f..9bdd52cd4 100644 --- a/src/inspect_ai/_eval/context.py +++ b/src/inspect_ai/_eval/context.py @@ -1,4 +1,4 @@ -from inspect_ai._util.telemetry import init_telemetry +from inspect_ai._util.hooks import init_hooks from inspect_ai.model import Model from inspect_ai.model._model import init_active_model, init_model_usage from inspect_ai.util._concurrency import init_concurrency @@ -9,7 +9,7 @@ def init_eval_context(max_subprocesses: int | None = None) -> None: init_concurrency() init_max_subprocesses(max_subprocesses) - init_telemetry() + init_hooks() def init_task_context(model: Model) -> None: diff --git a/src/inspect_ai/_eval/task/run.py b/src/inspect_ai/_eval/task/run.py index 72ed93937..f4bb4a0f3 100644 --- a/src/inspect_ai/_eval/task/run.py +++ b/src/inspect_ai/_eval/task/run.py @@ -20,11 +20,11 @@ from inspect_ai._util.datetime import iso_now from inspect_ai._util.error import exception_message from inspect_ai._util.file import file, filesystem +from inspect_ai._util.hooks import send_telemetry from inspect_ai._util.registry import ( is_registry_object, registry_log_name, ) -from inspect_ai._util.telemetry import send_telemetry from inspect_ai._util.url import data_uri_to_base64, is_data_uri from inspect_ai._view.view import view_notify_eval from inspect_ai.dataset import Dataset, Sample diff --git a/src/inspect_ai/_util/hooks.py b/src/inspect_ai/_util/hooks.py new file mode 100644 index 000000000..6ada8df74 --- /dev/null +++ b/src/inspect_ai/_util/hooks.py @@ -0,0 +1,118 @@ +import importlib +import os +from typing import Any, Awaitable, Callable, Literal, cast + +from rich import print + +from .constants import PKG_NAME +from .error import PrerequisiteError + +# Hooks are functions inside packages that are installed with an +# environment variable (e.g. INSPECT_TELEMETRY='mypackage.send_telemetry') +# If one or more hooks are enabled a message will be printed at startup +# indicating this, as well as which package/function implements each hook + + +# Telemetry (INSPECT_TELEMETRY) +# +# Telemetry can be optionally enabled by setting an INSPECT_TELEMETRY +# environment variable that points to a function in a package which +# conforms to the TelemetrySend signature below. + +# There are currently two types of telemetry sent: +# - model_usage (type ModelUsage) +# - eval_log (type EvalLog) + +TelemetrySend = Callable[[str, str], Awaitable[None]] + + +async def send_telemetry(type: Literal["model_usage", "eval_log"], json: str) -> None: + global _send_telemetry + if _send_telemetry: + await _send_telemetry(type, json) + + +_send_telemetry: TelemetrySend | None = None + +# API Key Override (INSPECT_API_KEY_OVERRIDE) +# +# API Key overrides can be optionally enabled by setting an +# INSPECT_API_KEY_OVERRIDE environment variable which conforms to the +# ApiKeyOverride signature below. +# +# The api key override function will be called with the name and value +# of provider specified environment variables that contain api keys, +# and it can optionally return an override value. + +ApiKeyOverride = Callable[[str, str], str | None] + + +def override_api_key(var: str, value: str) -> str | None: + global _override_api_key + if _override_api_key: + return _override_api_key(var, value) + else: + return None + + +_override_api_key: ApiKeyOverride | None = None + + +def init_hooks() -> None: + # messages we'll print for hooks if we have them + messages: list[str] = [] + + # telemetry + global _send_telemetry + if not _send_telemetry: + result = init_hook( + "telemetry", + "INSPECT_TELEMETRY", + "(eval logs and token usage will be recorded by the provider)", + ) + if result: + _send_telemetry, message = result + messages.append(message) + + # api key override + global _override_api_key + if not _override_api_key: + result = init_hook( + "api key override", + "INSPECT_API_KEY_OVERRIDE", + "(api keys will be read and modified by the provider)", + ) + if result: + _override_api_key, message = result + messages.append(message) + + # if any hooks are enabled, let the user know + if len(messages) > 0: + version = importlib.metadata.version(PKG_NAME) + all_messages = "\n".join([f"- {message}" for message in messages]) + print( + f"[blue][bold]inspect_ai v{version}[/bold][/blue]\n[bright_black]{all_messages}[/bright_black]\n" + ) + + +def init_hook( + name: str, env: str, message: str +) -> tuple[Callable[..., Any], str] | None: + hook = os.environ.get(env, "") + if hook: + # parse module/function + module_name, function_name = hook.strip().rsplit(".", 1) + # load (fail gracefully w/ clear error) + try: + module = importlib.import_module(module_name) + return ( + cast(Callable[..., Any], getattr(module, function_name)), + f"[bold]{name} enabled: {hook}[/bold]\n {message}", + ) + except (AttributeError, ModuleNotFoundError): + raise PrerequisiteError( + f"{env} provider not found: {hook}\n" + + "Please correct (or undefine) this environment variable before proceeding.\n" + ) + else: + return None diff --git a/src/inspect_ai/_util/telemetry.py b/src/inspect_ai/_util/telemetry.py deleted file mode 100644 index 565a96aa5..000000000 --- a/src/inspect_ai/_util/telemetry.py +++ /dev/null @@ -1,53 +0,0 @@ -import importlib -import os -from typing import Awaitable, Callable, Literal, cast - -from rich import print - -from .constants import PKG_NAME -from .error import PrerequisiteError - -# Telemetry can be optionally enabled by setting an INSPECT_TELEMETRY -# environment variable that points to a function in a package which -# conforms to the TelemetrySend signature below. For example, -# 'mypackage.inspect.send_telemetry'. When telemetry is enabled a -# message will be printed at startup indicating this, as well as which -# package is registered for telemetry. -# -# There are currently two types of telemetry sent: -# - model_usage (type ModelUsage) -# - eval_log (type EvalLog) - -TelemetrySend = Callable[[str, str], Awaitable[None]] - - -def init_telemetry() -> None: - global _send_telemetry - if not _send_telemetry: - telemetry = os.environ.get("INSPECT_TELEMETRY", "") - if telemetry: - # parse module/function - module_name, function_name = telemetry.strip().rsplit(".", 1) - # load (fail gracefully w/ clear error) - try: - module = importlib.import_module(module_name) - _send_telemetry = cast(TelemetrySend, getattr(module, function_name)) - except (AttributeError, ModuleNotFoundError): - raise PrerequisiteError( - f"INSPECT_TELEMETRY provider not found: {telemetry}\n" - + "Please correct (or undefine) this environment variable before proceeding.\n" - ) - # let the user know that telemetry is enabled - version = importlib.metadata.version(PKG_NAME) - print( - f"[blue][bold]inspect_ai v{version}\ntelemetry enabled: {telemetry}\n(eval logs and token usage will be recorded by provider)[/bold][/blue]\n" - ) - - -async def send_telemetry(type: Literal["model_usage", "eval_log"], json: str) -> None: - global _send_telemetry - if _send_telemetry: - await _send_telemetry(type, json) - - -_send_telemetry: TelemetrySend | None = None diff --git a/src/inspect_ai/model/_model.py b/src/inspect_ai/model/_model.py index 65634b38c..e16ea264c 100644 --- a/src/inspect_ai/model/_model.py +++ b/src/inspect_ai/model/_model.py @@ -20,6 +20,7 @@ from inspect_ai._util.constants import DEFAULT_MAX_CONNECTIONS from inspect_ai._util.content import Content, ContentText from inspect_ai._util.entrypoints import ensure_entry_points +from inspect_ai._util.hooks import init_hooks, override_api_key, send_telemetry from inspect_ai._util.platform import platform_init from inspect_ai._util.registry import ( RegistryInfo, @@ -28,7 +29,6 @@ registry_unqualified_name, ) from inspect_ai._util.retry import log_rate_limit_retry -from inspect_ai._util.telemetry import init_telemetry, send_telemetry from inspect_ai.tool import Tool, ToolChoice, ToolFunction, ToolInfo from inspect_ai.util import concurrency @@ -54,6 +54,7 @@ def __init__( model_name: str, base_url: str | None = None, api_key: str | None = None, + api_key_vars: list[str] = [], config: GenerateConfig = GenerateConfig(), ) -> None: """Create a model API provider. @@ -62,13 +63,34 @@ def __init__( model_name (str): Model name. base_url (str | None): Alternate base URL for model. api_key (str | None): API key for model. + api_key_vars (list[str]): Environment variables that + may contain keys for this provider (used for override) config (GenerateConfig): Model configuration. """ self.model_name = model_name self.base_url = base_url - self.api_key = api_key self.config = config + # apply api key override + for key in api_key_vars: + # if there is an explicit api_key passed then it + # overrides anything in the environment so use it + if api_key is not None: + override = override_api_key(key, api_key) + if override is not None: + api_key = override + # otherwise look it up in the environment and + # override it if it has a value + else: + value = os.environ.get(key, None) + if value is not None: + override = override_api_key(key, value) + if override is not None: + os.environ[key] = override + + # set any explicitly specified api key + self.api_key = api_key + @abc.abstractmethod async def generate( self, @@ -459,9 +481,9 @@ def match_modelapi_type(info: RegistryInfo) -> bool: # find a matching model type modelapi_types = registry_find(match_modelapi_type) if len(modelapi_types) > 0: - # create the model (init_telemetry here in case the model api + # create the model (init_hooks here in case the model api # is being used as a stadalone model interface outside of evals) - init_telemetry() + init_hooks() modelapi_type = cast(type[ModelAPI], modelapi_types[0]) modelapi_instance = modelapi_type( model_name=model, diff --git a/src/inspect_ai/model/_providers/anthropic.py b/src/inspect_ai/model/_providers/anthropic.py index 42f19d984..379e5819e 100644 --- a/src/inspect_ai/model/_providers/anthropic.py +++ b/src/inspect_ai/model/_providers/anthropic.py @@ -65,7 +65,11 @@ def __init__( **model_args: Any, ): super().__init__( - model_name=model_name, base_url=base_url, api_key=api_key, config=config + model_name=model_name, + base_url=base_url, + api_key=api_key, + api_key_vars=[ANTHROPIC_API_KEY], + config=config, ) # create client diff --git a/src/inspect_ai/model/_providers/azureai.py b/src/inspect_ai/model/_providers/azureai.py index fb56393c2..da2714b1c 100644 --- a/src/inspect_ai/model/_providers/azureai.py +++ b/src/inspect_ai/model/_providers/azureai.py @@ -46,7 +46,11 @@ def __init__( **model_args: Any, ): super().__init__( - model_name=model_name, base_url=base_url, api_key=api_key, config=config + model_name=model_name, + base_url=base_url, + api_key=api_key, + api_key_vars=[AZURE_API_KEY], + config=config, ) # required for some deployments diff --git a/src/inspect_ai/model/_providers/bedrock.py b/src/inspect_ai/model/_providers/bedrock.py index a75b04152..6f0e072c8 100644 --- a/src/inspect_ai/model/_providers/bedrock.py +++ b/src/inspect_ai/model/_providers/bedrock.py @@ -24,6 +24,7 @@ from .._generate_config import GenerateConfig from .._model import ModelAPI, simple_input_messages from .._model_output import ChatCompletionChoice, ModelOutput, ModelUsage +from .anthropic import ANTHROPIC_API_KEY from .util import as_stop_reason, model_base_url @@ -35,7 +36,12 @@ def __init__( config: GenerateConfig = GenerateConfig(), **model_args: Any, ): - super().__init__(model_name=model_name, base_url=base_url, config=config) + super().__init__( + model_name=model_name, + base_url=base_url, + api_key_vars=[ANTHROPIC_API_KEY], + config=config, + ) # we can optionally proxy to another ModelAPI self.model_api: ModelAPI | None = None diff --git a/src/inspect_ai/model/_providers/cloudflare.py b/src/inspect_ai/model/_providers/cloudflare.py index eaadfc434..f3c971f89 100644 --- a/src/inspect_ai/model/_providers/cloudflare.py +++ b/src/inspect_ai/model/_providers/cloudflare.py @@ -19,6 +19,9 @@ # https://developers.cloudflare.com/workers-ai/models/#text-generation +CLOUDFLARE_API_TOKEN = "CLOUDFLARE_API_TOKEN" + + class CloudFlareAPI(ModelAPI): def __init__( self, @@ -29,15 +32,21 @@ def __init__( **model_args: Any, ): super().__init__( - model_name=model_name, base_url=base_url, api_key=api_key, config=config + model_name=model_name, + base_url=base_url, + api_key=api_key, + api_key_vars=[CLOUDFLARE_API_TOKEN], + config=config, ) self.account_id = os.getenv("CLOUDFLARE_ACCOUNT_ID") if not self.account_id: raise RuntimeError("CLOUDFLARE_ACCOUNT_ID environment variable not set") if not self.api_key: - self.api_key = os.getenv("CLOUDFLARE_API_TOKEN") + self.api_key = os.getenv(CLOUDFLARE_API_TOKEN) if not self.api_key: - raise RuntimeError("CLOUDFLARE_API_TOKEN environment variable not set") + raise RuntimeError( + f"{CLOUDFLARE_API_TOKEN} environment variable not set" + ) self.client = httpx.AsyncClient() base_url = model_base_url(base_url, "CLOUDFLARE_BASE_URL") self.base_url = ( diff --git a/src/inspect_ai/model/_providers/google.py b/src/inspect_ai/model/_providers/google.py index 27cb1f815..c38485cc7 100644 --- a/src/inspect_ai/model/_providers/google.py +++ b/src/inspect_ai/model/_providers/google.py @@ -59,6 +59,8 @@ HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_NONE, } +GOOGLE_API_KEY = "GOOGLE_API_KEY" + class GoogleAPI(ModelAPI): def __init__( @@ -70,7 +72,11 @@ def __init__( **model_args: Any, ) -> None: super().__init__( - model_name=model_name, base_url=base_url, api_key=api_key, config=config + model_name=model_name, + base_url=base_url, + api_key=api_key, + api_key_vars=[GOOGLE_API_KEY], + config=config, ) # configure genai client diff --git a/src/inspect_ai/model/_providers/hf.py b/src/inspect_ai/model/_providers/hf.py index 7b779808f..ebc627b1d 100644 --- a/src/inspect_ai/model/_providers/hf.py +++ b/src/inspect_ai/model/_providers/hf.py @@ -33,6 +33,8 @@ ) from .._util import chat_api_input +HF_TOKEN = "HF_TOKEN" + class HuggingFaceAPI(ModelAPI): def __init__( @@ -44,7 +46,11 @@ def __init__( **model_args: Any, ): super().__init__( - model_name=model_name, base_url=base_url, api_key=api_key, config=config + model_name=model_name, + base_url=base_url, + api_key=api_key, + api_key_vars=[HF_TOKEN], + config=config, ) # set random seeds diff --git a/src/inspect_ai/model/_providers/mistral.py b/src/inspect_ai/model/_providers/mistral.py index 29a8a0fad..4d58db9f5 100644 --- a/src/inspect_ai/model/_providers/mistral.py +++ b/src/inspect_ai/model/_providers/mistral.py @@ -56,7 +56,15 @@ def __init__( **model_args: Any, ): super().__init__( - model_name=model_name, base_url=base_url, api_key=api_key, config=config + model_name=model_name, + base_url=base_url, + api_key=api_key, + api_key_vars=[ + MISTRAL_API_KEY, + AZURE_MISTRAL_API_KEY, + AZUREAI_MISTRAL_API_KEY, + ], + config=config, ) # resolve api_key -- look for mistral then azure diff --git a/src/inspect_ai/model/_providers/mockllm.py b/src/inspect_ai/model/_providers/mockllm.py index 41bd39349..8a34227b0 100644 --- a/src/inspect_ai/model/_providers/mockllm.py +++ b/src/inspect_ai/model/_providers/mockllm.py @@ -30,7 +30,7 @@ def __init__( custom_outputs: Iterable[ModelOutput] = [], **model_args: dict[str, Any], ) -> None: - super().__init__(model_name, base_url, api_key, config) + super().__init__(model_name, base_url, api_key, [], config) self.model_args = model_args if model_name != "model": raise ValueError(f"Invalid model name: {model_name}") diff --git a/src/inspect_ai/model/_providers/openai.py b/src/inspect_ai/model/_providers/openai.py index 32b9dd24c..896893e73 100644 --- a/src/inspect_ai/model/_providers/openai.py +++ b/src/inspect_ai/model/_providers/openai.py @@ -61,7 +61,11 @@ def __init__( ) -> None: # call super super().__init__( - model_name=model_name, base_url=base_url, api_key=api_key, config=config + model_name=model_name, + base_url=base_url, + api_key=api_key, + api_key_vars=[OPENAI_API_KEY, AZURE_OPENAI_API_KEY, AZUREAI_OPENAI_API_KEY], + config=config, ) # resolve api_key diff --git a/tests/test_package/inspect_package/modelapi/custom.py b/tests/test_package/inspect_package/modelapi/custom.py index 3152bd8e0..70a22b872 100644 --- a/tests/test_package/inspect_package/modelapi/custom.py +++ b/tests/test_package/inspect_package/modelapi/custom.py @@ -15,7 +15,7 @@ def __init__( api_key: str | None = None, config: GenerateConfig = GenerateConfig(), ) -> None: - super().__init__(model_name, base_url, api_key, config) + super().__init__(model_name, base_url, api_key, [], config) async def generate( self, diff --git a/tools/vscode/CHANGELOG.md b/tools/vscode/CHANGELOG.md index 81581dbc5..ca69ffd14 100644 --- a/tools/vscode/CHANGELOG.md +++ b/tools/vscode/CHANGELOG.md @@ -1,5 +1,9 @@ # Changelog +## 0.3.24 + +- Properly deal with URI encoded characters in log directories when opening a log file + ## 0.3.23 - Ensure the log view only opens in the correct window when debugging a task diff --git a/tools/vscode/package.json b/tools/vscode/package.json index 6748f2592..e84cd50a7 100644 --- a/tools/vscode/package.json +++ b/tools/vscode/package.json @@ -7,7 +7,7 @@ "author": { "name": "UK AI Safety Institute" }, - "version": "0.3.23", + "version": "0.3.24", "license": "MIT", "homepage": "https://ukgovernmentbeis.github.io/inspect_ai/", "repository": { diff --git a/tools/vscode/src/inspect/logs.ts b/tools/vscode/src/inspect/logs.ts index 014b9c426..3b3d577ef 100644 --- a/tools/vscode/src/inspect/logs.ts +++ b/tools/vscode/src/inspect/logs.ts @@ -13,7 +13,7 @@ export function inspectEvalLogs(cwd: AbsolutePath, log_dir?: Uri): string | unde const cmdArgs = ["list", "logs", "--json"]; if (log_dir) { cmdArgs.push("--log-dir"); - cmdArgs.push(log_dir.toString()); + cmdArgs.push(log_dir.toString(true)); } const output = runProcess(inspectBin, cmdArgs, cwd); return output; diff --git a/tools/vscode/src/providers/workspace/workspace-env-provider.ts b/tools/vscode/src/providers/workspace/workspace-env-provider.ts index 3ee8cf47b..8fa490188 100644 --- a/tools/vscode/src/providers/workspace/workspace-env-provider.ts +++ b/tools/vscode/src/providers/workspace/workspace-env-provider.ts @@ -10,7 +10,7 @@ import { isEqual } from "lodash"; import { workspaceEnvCommands } from "./workspace-env-commands"; import { activeWorkspaceFolder } from "../../core/workspace"; import { log } from "../../core/log"; -import { statSync } from "fs"; +import { existsSync, statSync } from "fs"; import { toAbsolutePath, workspaceRelativePath } from "../../core/path"; export function activateWorkspaceEnv(): [Command[], WorkspaceEnvManager] { @@ -31,14 +31,16 @@ export class WorkspaceEnvManager implements Disposable { const envRelativePath = workspaceRelativePath(toAbsolutePath(envUri.fsPath)); log.appendLine(`Watching ${envRelativePath}`); this.envWatcher_ = setInterval(() => { - const envUpdated = statSync(envUri.fsPath).mtime.getTime(); - if (envUpdated > this.lastUpdated_) { - this.lastUpdated_ = envUpdated; - const newEnv = readEnv(envUri); - if (!isEqual(this.env, newEnv)) { - log.appendLine(`${envRelativePath} changed`); - this.env = newEnv; - this.onEnvironmentChanged_.fire({}); + if (existsSync(envUri.fsPath)) { + const envUpdated = statSync(envUri.fsPath).mtime.getTime(); + if (envUpdated > this.lastUpdated_) { + this.lastUpdated_ = envUpdated; + const newEnv = readEnv(envUri); + if (!isEqual(this.env, newEnv)) { + log.appendLine(`${envRelativePath} changed`); + this.env = newEnv; + this.onEnvironmentChanged_.fire({}); + } } } }, 1000);