Merge branch 'UKGovernmentBEIS:main' into main

UKGovernmentBEIS · Jan 9, 2025 · 3617f14 · 3617f14
2 parents a748c5c + c5dc3e0
commit 3617f14
Show file tree

Hide file tree

Showing 14 changed files with 58 additions and 61 deletions.
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
@@ -90,4 +90,4 @@ jobs:
       - name: Delete knowingly duplicated files
         run: rm src/inspect_ai/_view/www/favicon.svg
 
-      - uses: hynek/build-and-inspect-python-package@v1
+      - uses: hynek/build-and-inspect-python-package@v2
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -4,15 +4,15 @@
 default_language_version:
   python: python3.11
 repos:
-- repo: https://github.com/astral-sh/ruff-pre-commit
-  rev: v0.8.6
-  hooks:
-    # Run the linter.
-    - id: ruff
-      args: [ --fix ]
-    # Run the formatter.
-    - id: ruff-format
--   repo: https://github.com/pre-commit/pre-commit-hooks
+  - repo: https://github.com/astral-sh/ruff-pre-commit
+    rev: v0.9.0
+    hooks:
+      # Run the linter.
+      - id: ruff
+        args: [--fix]
+      # Run the formatter.
+      - id: ruff-format
+  - repo: https://github.com/pre-commit/pre-commit-hooks
     rev: v4.5.0
     hooks:
       - id: check-added-large-files

diff --git a/pyproject.toml b/pyproject.toml
@@ -129,7 +129,7 @@ dev = [
     "pytest-cov",
     "pytest-dotenv",
     "pytest-xdist",
-    "ruff==0.8.6", # match version specified in .pre-commit-config.yaml
+    "ruff==0.9.0", # match version specified in .pre-commit-config.yaml
     "textual-dev>=0.86.2",
     "types-PyYAML",
     "types-beautifulsoup4",

diff --git a/src/inspect_ai/_display/core/panel.py b/src/inspect_ai/_display/core/panel.py
@@ -112,7 +112,7 @@ def tasks_title(completed: int, total: int) -> str:
 def task_title(profile: TaskProfile, show_model: bool) -> str:
     eval_epochs = profile.eval_config.epochs or 1
     epochs = f" x {profile.eval_config.epochs}" if eval_epochs > 1 else ""
-    samples = f"{profile.samples//eval_epochs:,}{epochs} sample{'s' if profile.samples != 1 else ''}"
+    samples = f"{profile.samples // eval_epochs:,}{epochs} sample{'s' if profile.samples != 1 else ''}"
     title = f"{registry_unqualified_name(profile.name)} ({samples})"
     if show_model:
         title = f"{title}: {profile.model}"

diff --git a/src/inspect_ai/_util/datetime.py b/src/inspect_ai/_util/datetime.py
@@ -4,7 +4,7 @@
 
 def iso_now(
     timespec: Literal[
-        "auto", "hours", "minutes", "seconds", "milliseconds" "microseconds"
+        "auto", "hours", "minutes", "seconds", "milliseconds", "microseconds"
     ] = "seconds",
 ) -> str:
     return datetime.now().astimezone().isoformat(timespec=timespec)
diff --git a/src/inspect_ai/_util/deprecation.py b/src/inspect_ai/_util/deprecation.py
@@ -174,7 +174,7 @@ def default_deprecation_msg(
 
         _qual = getattr(obj, "__qualname__", "") or ""
         if _qual.endswith(".__init__") or _qual.endswith(".__new__"):
-            _obj = f' class ({_qual.rsplit(".", 1)[0]})'
+            _obj = f" class ({_qual.rsplit('.', 1)[0]})"
         elif _qual and _obj:
             _obj += f" ({_qual})"
 

diff --git a/src/inspect_ai/_util/logger.py b/src/inspect_ai/_util/logger.py
@@ -1,5 +1,6 @@
 import atexit
 import os
+import re
 from logging import (
     DEBUG,
     INFO,
@@ -182,7 +183,7 @@ def notify_logger_record(record: LogRecord, write: bool) -> None:
     if write:
         transcript()._event(LoggerEvent(message=LoggingMessage.from_log_record(record)))
     global _rate_limit_count
-    if (record.levelno <= INFO and "429" in record.getMessage()) or (
+    if (record.levelno <= INFO and re.search(r"\b429\b", record.getMessage())) or (
         record.levelno == DEBUG
         # See https://boto3.amazonaws.com/v1/documentation/api/latest/guide/retries.html#validating-retry-attempts
         # for boto retry logic / log messages (this is tracking standard or adapative retries)

diff --git a/src/inspect_ai/_util/transcript.py b/src/inspect_ai/_util/transcript.py
@@ -120,12 +120,10 @@ def transcript_function(function: str, arguments: dict[str, Any]) -> RenderableT
     return transcript_markdown("```python\n" + call + "\n```\n")
 
 
-DOUBLE_LINE = Box(
-    " ══ \n" "    \n" "    \n" "    \n" "    \n" "    \n" "    \n" "    \n"
-)
+DOUBLE_LINE = Box(" ══ \n    \n    \n    \n    \n    \n    \n    \n")
 
-LINE = Box(" ── \n" "    \n" "    \n" "    \n" "    \n" "    \n" "    \n" "    \n")
+LINE = Box(" ── \n    \n    \n    \n    \n    \n    \n    \n")
 
-DOTTED = Box(" ·· \n" "    \n" "    \n" "    \n" "    \n" "    \n" "    \n" "    \n")
+DOTTED = Box(" ·· \n    \n    \n    \n    \n    \n    \n    \n")
 
-NOBORDER = Box("    \n" "    \n" "    \n" "    \n" "    \n" "    \n" "    \n" "    \n")
+NOBORDER = Box("    \n    \n    \n    \n    \n    \n    \n    \n")
diff --git a/src/inspect_ai/tool/_tools/_web_browser/_resources/dm_env_servicer.py b/src/inspect_ai/tool/_tools/_web_browser/_resources/dm_env_servicer.py
@@ -38,9 +38,9 @@ def __init__(self, env: dm_env.Environment):
             for i, obs_spec in enumerate(env_obs_spec.values()):
                 self.observation_spec[i + 1] = convert(obs_spec)
 
-        assert isinstance(
-            env.action_spec(), specs.Array
-        ), "Only a single action type is supported."
+        assert isinstance(env.action_spec(), specs.Array), (
+            "Only a single action type is supported."
+        )
         self.action_spec = {1: convert(env.action_spec())}
 
         self.observation_manager = spec_manager.SpecManager(self.observation_spec)
@@ -234,12 +234,12 @@ def _handle_step_request(
             observations.
         """
         with self._lock:
-            assert (
-                cur_world in self._envs
-            ), "Current world does not have an assosiated environment"
-            assert (
-                cur_world in self._joined_worlds
-            ), "Please join world before calling step."
+            assert cur_world in self._envs, (
+                "Current world does not have an assosiated environment"
+            )
+            assert cur_world in self._joined_worlds, (
+                "Please join world before calling step."
+            )
             env = self._envs[cur_world]
             spec = self._specs[cur_world]
 

diff --git a/src/inspect_ai/util/_sandbox/docker/compose.py b/src/inspect_ai/util/_sandbox/docker/compose.py
@@ -33,9 +33,7 @@ async def compose_up(project: ComposeProject) -> None:
         timeout=300,
     )
     if not result.success:
-        msg = (
-            f"Failed to start docker services for {project.config}: " f"{result.stderr}"
-        )
+        msg = f"Failed to start docker services for {project.config}: {result.stderr}"
         raise RuntimeError(msg)
 
 

diff --git a/src/inspect_ai/util/_sandbox/self_check.py b/src/inspect_ai/util/_sandbox/self_check.py
@@ -75,9 +75,9 @@ async def test_read_and_write_file_text(sandbox_env: SandboxEnvironment) -> None
     written_file_string = await sandbox_env.read_file(
         "test_read_and_write_file_text.file", text=True
     )
-    assert (
-        "great #content\nincluding newlines" == written_file_string
-    ), f"unexpected content: [{written_file_string}]"
+    assert "great #content\nincluding newlines" == written_file_string, (
+        f"unexpected content: [{written_file_string}]"
+    )
     await _cleanup_file(sandbox_env, "test_read_and_write_file_text.file")
 
 
@@ -219,9 +219,9 @@ async def test_exec_output(sandbox_env: SandboxEnvironment) -> None:
     exec_result = await sandbox_env.exec(["sh", "-c", "echo foo; echo bar"])
     expected = "foo\nbar\n"
     # in the assertion message, we show the actual bytes to help debug newline issues
-    assert (
-        exec_result.stdout == expected
-    ), f"Unexpected output:expected {expected.encode('UTF-8')!r}; got {exec_result.stdout.encode('UTF-8')!r}"
+    assert exec_result.stdout == expected, (
+        f"Unexpected output:expected {expected.encode('UTF-8')!r}; got {exec_result.stdout.encode('UTF-8')!r}"
+    )
 
 
 async def test_exec_timeout(sandbox_env: SandboxEnvironment) -> None:
@@ -248,13 +248,13 @@ async def test_exec_as_user(sandbox_env: SandboxEnvironment) -> None:
 
         # Test exec as different users
         root_result = await sandbox_env.exec(["whoami"], user="root")
-        assert (
-            root_result.stdout.strip() == "root"
-        ), f"Expected 'root', got '{root_result.stdout.strip()}'"
+        assert root_result.stdout.strip() == "root", (
+            f"Expected 'root', got '{root_result.stdout.strip()}'"
+        )
         myuser_result = await sandbox_env.exec(["whoami"], user=username)
-        assert (
-            myuser_result.stdout.strip() == username
-        ), f"Expected '{username}', got '{myuser_result.stdout.strip()}'"
+        assert myuser_result.stdout.strip() == username, (
+            f"Expected '{username}', got '{myuser_result.stdout.strip()}'"
+        )
     finally:
         # Clean up
         await sandbox_env.exec(["userdel", "-r", username], user="root")
@@ -266,9 +266,9 @@ async def test_exec_as_nonexistent_user(sandbox_env: SandboxEnvironment) -> None
     expected_error = (
         "unable to find user nonexistent: no matching entries in passwd file"
     )
-    assert (
-        expected_error in result.stdout
-    ), f"Error string '{expected_error}' not found in error output: '{result.stdout}'"
+    assert expected_error in result.stdout, (
+        f"Error string '{expected_error}' not found in error output: '{result.stdout}'"
+    )
 
 
 async def test_cwd_unspecified(sandbox_env: SandboxEnvironment) -> None:
@@ -291,9 +291,9 @@ async def test_cwd_relative(sandbox_env: SandboxEnvironment) -> None:
     file_path = cwd_subdirectory + "/" + file_name
     await sandbox_env.write_file(file_path, "ls me plz")
     current_dir_contents = (await sandbox_env.exec(["ls"], cwd=cwd_subdirectory)).stdout
-    assert (
-        file_name in current_dir_contents
-    ), f"{file_name} not found in {current_dir_contents}"
+    assert file_name in current_dir_contents, (
+        f"{file_name} not found in {current_dir_contents}"
+    )
     await _cleanup_file(sandbox_env, file_path)
 
 

diff --git a/src/inspect_ai/util/_subprocess.py b/src/inspect_ai/util/_subprocess.py
@@ -101,9 +101,9 @@ async def subprocess(
     input = input.encode() if isinstance(input, str) else input
 
     # function to run command (we may or may not run it w/ concurrency)
-    async def run_command() -> (
-        AsyncGenerator[Union[Process, ExecResult[str], ExecResult[bytes]], None]
-    ):
+    async def run_command() -> AsyncGenerator[
+        Union[Process, ExecResult[str], ExecResult[bytes]], None
+    ]:
         if isinstance(args, str):
             proc = await asyncio.create_subprocess_shell(
                 args,

diff --git a/tests/log/test_log_formats.py b/tests/log/test_log_formats.py
@@ -114,9 +114,9 @@ def test_log_format_eval_zip_structure(original_log, temp_dir):
     write_eval_log(eval_log, new_eval_log_path, format="eval")
 
     # Compare the two EVAL files
-    assert compare_zip_contents(
-        eval_log_path, new_eval_log_path
-    ), "EVAL zip file contents changed after round trip"
+    assert compare_zip_contents(eval_log_path, new_eval_log_path), (
+        "EVAL zip file contents changed after round trip"
+    )
 
 
 def test_log_format_eval_zip_json_integrity(original_log, temp_dir):
@@ -152,9 +152,9 @@ def test_log_format_eval_zip_roundtrip(original_log, temp_dir):
     new_json_log.location = None
 
     # Compare the original and new JSON logs
-    assert (
-        original_log == new_json_log
-    ), "JSON content changed after roundtrip through EVAL format"
+    assert original_log == new_json_log, (
+        "JSON content changed after roundtrip through EVAL format"
+    )
 
 
 def compare_zip_contents(zip_file1: Path, zip_file2: Path) -> bool:

diff --git a/tests/util/test_subprocess.py b/tests/util/test_subprocess.py
@@ -69,7 +69,7 @@ def process_found(pattern: str) -> bool:
         )
 
     timeout_length = random() * 60
-    subprocess_cmds = ["sleep", f"{2+timeout_length}"]
+    subprocess_cmds = ["sleep", f"{2 + timeout_length}"]
 
     if process_found(" ".join(subprocess_cmds)):
         raise Exception(