UKGovernmentBEIS · jjallaire · Jan 25, 2025 · Jan 24, 2025 · Jan 24, 2025 · Jan 24, 2025
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -4,6 +4,7 @@
 
 - [Agent Bridge](https://inspect.ai-safety-institute.org.uk/agent-bridge.html) for integrating external agent frameworks with Inspect.
 - Add `@wraps` to functions wrapped by Inspect decorators to preserve type information.
+- Hugging Face: Add support for stop sequences for HF models.
 - Docker: More robust parsing of version strings (handle development versions).
 
 ## v0.3.59 (24 January 2025)

diff --git a/src/inspect_ai/model/_providers/hf.py b/src/inspect_ai/model/_providers/hf.py
@@ -150,6 +150,12 @@ async def generate(
             kwargs["output_logits"] = config.logprobs
         if "return_dict_in_generate" in kwargs:
             assert kwargs["return_dict_in_generate"]
+        if config.stop_seqs is not None:
+            from transformers.generation import StopStringCriteria  # type: ignore
+
+            stopping_criteria = [StopStringCriteria(self.tokenizer, config.stop_seqs)]
+            kwargs["stopping_criteria"] = stopping_criteria
+
         kwargs["return_dict_in_generate"] = True
         generator = functools.partial(self.model.generate, **kwargs)
 

diff --git a/tests/model/providers/test_hf.py b/tests/model/providers/test_hf.py
@@ -27,6 +27,28 @@ def model():
     )
 
 
+@pytest.fixture
+def model_with_stop_seqs():
+    DEFAULT_CHAT_TEMPLATE = (
+        "{% for message in messages %}{{ message.content }}{% endfor %}"
+    )
+    model = get_model(
+        "hf/EleutherAI/pythia-70m",
+        config=GenerateConfig(
+            max_tokens=5,
+            seed=42,
+            temperature=0.001,
+            stop_seqs=["w3"],
+        ),
+        # this allows us to run base models with the chat message scaffolding:
+        chat_template=DEFAULT_CHAT_TEMPLATE,
+        tokenizer_call_args={"truncation": True, "max_length": 10},
+    )
+    # Chat template is not propagated by default from get_model to the model's tokenizer.
+    model.api.tokenizer.chat_template = DEFAULT_CHAT_TEMPLATE
+    return model
+
+
 @pytest.mark.asyncio
 @skip_if_github_action
 @skip_if_no_transformers
@@ -38,6 +60,17 @@ async def test_hf_api(model) -> None:
     assert len(response.completion) >= 1
 
 
+@pytest.mark.asyncio
+@skip_if_github_action
+@skip_if_no_transformers
+@skip_if_no_accelerate
+async def test_hf_api_with_stop_seqs(model_with_stop_seqs) -> None:
+    # This generates "https://www.w3.org" with pythia-70m greedy decoding
+    message = ChatMessageUser(content="https://")
+    response = await model_with_stop_seqs.generate(input=[message])
+    assert response.completion == "www.w3"
+
+
 @pytest.mark.asyncio
 @skip_if_github_action
 @skip_if_no_transformers