Automated testing for codeblocks in docs (#992)

* Automated testing for codeblocks * Update github actions
comet-ml · Jan 7, 2025 · ec38f6c · ec38f6c
1 parent bc63af2
commit ec38f6c
Show file tree

Hide file tree

Showing 37 changed files with 552 additions and 49 deletions.
diff --git a/.github/workflows/documentation_codeblock_tests.yml b/.github/workflows/documentation_codeblock_tests.yml
@@ -0,0 +1,57 @@
+name: Documentation - Test codeblocks
+on:
+  workflow_dispatch:
+    inputs:
+      install_opik:
+        description: 'Enable opik installation from source files'
+        required: false
+        default: 'false'
+        type: choice
+        options:
+          - 'false'
+          - 'true'
+  schedule:
+    - cron: '0 0 * * *'  # Run once a day at midnight UTC
+jobs:
+  collect_test_paths:
+    runs-on: ubuntu-latest
+    outputs:
+      test_paths: ${{ steps.paths.outputs.paths }}
+    steps:
+      - uses: actions/checkout@v3
+      - id: paths
+        working-directory: apps/opik-documentation/documentation
+        run: |
+          # Collect both directories and markdown files in root
+          (
+            ls -d docs/*/ 2>/dev/null;
+            find docs -maxdepth 1 -type f -name "*.md"
+          ) | jq --raw-input --slurp --compact-output 'split("\n")[:-1]' >> ${GITHUB_OUTPUT}
+
+  test:
+    needs: collect_test_paths
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        path: ${{ fromJson(needs.collect_test_paths.outputs.test_paths) }}
+      fail-fast: false
+    steps:
+      - uses: actions/checkout@v3
+
+      - name: Set up Python
+        uses: actions/setup-python@v4
+        with:
+          python-version: '3.10'
+
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install pytest
+          if [ "${{ github.event.inputs.install_opik }}" = "true" ]; then
+            pip install -e .
+          fi
+          
+      - name: Run tests
+        working-directory: apps/opik-documentation/documentation
+        run: |
+          pytest ${{ matrix.path }} -v
diff --git a/apps/opik-documentation/documentation/.gitignore b/apps/opik-documentation/documentation/.gitignore
@@ -4,6 +4,8 @@
 # Production
 /build
 
+/data
+
 # Generated files
 .docusaurus
 .cache-loader

diff --git a/apps/opik-documentation/documentation/conftest.py b/apps/opik-documentation/documentation/conftest.py
@@ -0,0 +1,4 @@
+from pytest_codeblocks.pytest_integration import pytest_collect_file
+
+# Export the necessary components
+__all__ = ["pytest_collect_file"]
diff --git a/apps/opik-documentation/documentation/docs/changelog.md b/apps/opik-documentation/documentation/docs/changelog.md
@@ -1,6 +1,7 @@
 ---
 sidebar_label: Changelog
 description: Weelkly changelog for Opik
+pytest_codeblocks_skip: true
 ---
 
 # Weekly Changelog

diff --git a/apps/opik-documentation/documentation/docs/evaluation/evaluate_your_llm.md b/apps/opik-documentation/documentation/docs/evaluation/evaluate_your_llm.md
@@ -1,6 +1,7 @@
 ---
 sidebar_label: Evaluate your LLM Application
 description: Step by step guide on how to evaluate your LLM application
+pytest_codeblocks_execute_previous: true
 ---
 
 # Evaluate your LLM Application
@@ -130,7 +131,7 @@ def your_llm_application(input: str) -> str:
 # Define the evaluation task
 def evaluation_task(x):
     return {
-        "output": your_llm_application(x['user_question'])
+        "output": your_llm_application(x['input'])
     }
 
 # Create a simple dataset
@@ -145,7 +146,6 @@ dataset.insert([
 hallucination_metric = Hallucination()
 
 evaluation = evaluate(
-    experiment_name="My experiment",
     dataset=dataset,
     task=evaluation_task,
     scoring_metrics=[hallucination_metric],
@@ -193,7 +193,6 @@ prompt = opik.Prompt(
 
 # Run the evaluation
 evaluation = evaluate(
-    experiment_name="My experiment",
     dataset=dataset,
     task=evaluation_task,
     scoring_metrics=[hallucination_metric],

diff --git a/apps/opik-documentation/documentation/docs/production/gateway.mdx b/apps/opik-documentation/documentation/docs/production/gateway.mdx
@@ -78,7 +78,7 @@ The plugin is open source and available at [comet-ml/opik-kong-plugin](https://g
 
 Once the plugin is installed, you can enable it by running:
 
-```bash
+```bash pytest_codeblocks_skip="true"
 curl -is -X POST http://localhost:8001/services/{serviceName|Id}/plugins \
     --header "accept: application/json" \
   --header "Content-Type: application/json" \

diff --git a/...ion/docs/tracing/production_monitoring.md → .../docs/production/production_monitoring.md b/...ion/docs/tracing/production_monitoring.md → .../docs/production/production_monitoring.md
@@ -51,9 +51,7 @@ import opik
 opik_client = opik.Opik()
 
 traces = opik_client.search_traces(
-    project_name="Default Project",
-    start_time="2024-01-01",
-    end_time="2025-01-01",
+    project_name="Default Project"
 )
 ```
 
@@ -67,12 +65,11 @@ The `search_traces` method allows you to fetch traces based on any of trace attr
 
 Once you have fetched the traces you want to annotate, you can update the feedback scores using the [`Opik.log_traces_feedback_scores`](https://www.comet.com/docs/opik/python-sdk-reference/Opik.html#opik.Opik.log_traces_feedback_scores) method.
 
-```python
+```python pytest_codeblocks_skip="true"
 for trace in traces:
     opik_client.log_traces_feedback_scores(
         project_name="Default Project",
-        trace_ids=[i.id],
-        feedback_scores=[{"name": "user_feedback", "value": 1.0, "reason": "The response was helpful and accurate."}],
+        feedback_scores=[{"id": trace.id, "name": "user_feedback", "value": 1.0, "reason": "The response was helpful and accurate."}],
     )
 ```
 

diff --git a/apps/opik-documentation/documentation/docs/quickstart.mdx b/apps/opik-documentation/documentation/docs/quickstart.mdx
@@ -76,7 +76,7 @@ All OpenAI calls made using the `openai_client` will now be logged to Opik.
 </TabItem>
 <TabItem value="LiteLLM" title="LiteLLM">
 
-```python
+```python pytest_codeblocks_skip="true"
 from litellm.integrations.opik.opik import OpikLogger
 import litellm
 
@@ -93,7 +93,7 @@ All LiteLLM calls made using the `litellm` client will now be logged to Opik.
 
 If you are using an LLM provider that Opik does not have an integration for, you can still log the LLM calls by using the `@track` decorator:
 
-```python
+```python pytest_codeblocks_skip="true"
 from opik import track
 import anthropic
 
@@ -182,7 +182,7 @@ While this code sample assumes that you are using OpenAI, the same principle app
 If you are using LangChain to build your chains, you can use the `OpikTracer` to log your chains. The `OpikTracer` is a LangChain callback that will
 log every step of the chain to Opik:
 
-```python
+```python pytest_codeblocks_skip="true"
 from langchain_openai import OpenAI
 from langchain.prompts import PromptTemplate
 from opik.integrations.langchain import OpikTracer
@@ -211,7 +211,7 @@ llm_chain.invoke({"input": "Hello, how are you?"}, callbacks=[opik_tracer])
 
 If you are using LLamaIndex you can set `opik` as a global callback to log all LLM calls:
 
-```python
+```python pytest_codeblocks_skip="true"
 from llama_index.core import global_handler, set_global_handler
 
 set_global_handler("opik")

diff --git a/apps/opik-documentation/documentation/docs/self-host/kubernetes.md b/apps/opik-documentation/documentation/docs/self-host/kubernetes.md
@@ -1,6 +1,7 @@
 ---
 sidebar_label: Production (Kubernetes)
 description: Describes how to run Opik on a Kubernetes cluster
+test_code_snippets: false
 ---
 
 # Production ready Kubernetes deployment

diff --git a/apps/opik-documentation/documentation/docs/self-host/local_deployment.md b/apps/opik-documentation/documentation/docs/self-host/local_deployment.md
@@ -1,6 +1,7 @@
 ---
 sidebar_label: Local (Docker Compose)
 description: Describes how to run Opik locally using Docker Compose
+test_code_snippets: false
 ---
 
 # Local Deployments using Docker Compose

diff --git a/apps/opik-documentation/documentation/docs/self-host/overview.md b/apps/opik-documentation/documentation/docs/self-host/overview.md
@@ -1,6 +1,7 @@
 ---
 sidebar_label: Overview
 description: High-level overview on how to self-host Opik
+test_code_snippets: false
 ---
 
 # Self-hosting Opik
@@ -16,7 +17,7 @@ If you choose to self-host Opik, you can choose between two deployment options:
 
 If you would like to try out Opik locally, we recommend using our Local installation based on `docker compose`. Assuming you have `git` and `docker` installed, you can get started in a couple of minutes:
 
-```bash
+```bash pytest_codeblocks_skip="true"
 # Clone the Opik repository
 git clone https://github.com/comet-ml/opik.git
 
@@ -28,14 +29,14 @@ docker compose up --detach
 
 Opik will now be available at <a href="http://localhost:5173" target="_blank">http://localhost:5173</a> and all traces logged from your local machine will be logged to this local Opik instance. In order for traces and other data to be logged to your Opik instance, you need to make sure that the Opik Python SDK is configured to point to the Opik server you just started. You can do this by running the following command:
 
-```bash
+```bash pytest_codeblocks_skip="true"
 # Configure the Python SDK to point to the local Opik platform
 export OPIK_BASE_URL=http://localhost:5173/api
 ```
 
 or in Python:
 
-```python
+```python pytest_codeblocks_skip="true"
 import os
 
 os.environ["OPIK_BASE_URL"] = "http://localhost:5173/api"

diff --git a/apps/opik-documentation/documentation/docs/tracing/annotate_traces.md b/apps/opik-documentation/documentation/docs/tracing/annotate_traces.md
@@ -111,7 +111,7 @@ from opik.evaluation.metrics import Contains
 metric = Contains()
 score = metric.score(
     output="The quick brown fox jumps over the lazy dog.",
-    expected_output="The quick brown fox jumps over the lazy dog."
+    reference="The quick brown fox jumps over the lazy dog."
 )
 ```
 

diff --git a/apps/opik-documentation/documentation/docs/tracing/cost_tracking.md b/apps/opik-documentation/documentation/docs/tracing/cost_tracking.md
@@ -1,6 +1,7 @@
 ---
 sidebar_label: Cost Tracking
 description: Describes how to track and monitor costs for your LLM applications using Opik
+test_code_snippets: false
 ---
 
 # Cost Tracking
@@ -42,9 +43,9 @@ You can retrieve the estimated cost programmatically for both spans and traces.
 ```python
 import opik
 
-client = opik.Client()
+client = opik.Opik()
 
-span = client.get_span_content(SPAN_ID)
+span = client.get_span_content("<SPAN_ID>")
 # Returns estimated cost in USD, or None for unsupported models
 print(span.total_estimated_cost)
 ```
@@ -54,9 +55,9 @@ print(span.total_estimated_cost)
 ```python
 import opik
 
-client = opik.Client()
+client = opik.Opik()
 
-trace = client.get_trace_content(TRACE_ID)
+trace = client.get_trace_content("<TRACE_ID>")
 # Returns estimated cost in USD, or None for unsupported models
 print(trace.total_estimated_cost)
 ```

diff --git a/apps/opik-documentation/documentation/docs/tracing/integrations/anthropic.md b/apps/opik-documentation/documentation/docs/tracing/integrations/anthropic.md
@@ -36,21 +36,21 @@ opik configure
 
 In order to configure Anthropic, you will need to have your Anthropic API Key set, see this [section how to pass your Anthropic API Key](https://github.com/anthropics/anthropic-sdk-python?tab=readme-ov-file#usage).
 
-Once you have it, you can set create your Anthropic client:
+Once you have it, you can set it as an environment variable:
 
-```python
-import anthropic
-
-anthropic_client = anthropic.Anthropic()
+```bash pytest_codeblocks_skip="true"
+export ANTHROPIC_API_KEY="YOUR_API_KEY"
 ```
 
 ## Logging LLM calls
 
 In order to log the LLM calls to Opik, you will need to create the wrap the anthropic client with `track_anthropic`. When making calls with that wrapped client, all calls will be logged to Opik:
 
 ```python
+import anthropic
 from opik.integrations.anthropic import track_anthropic
 
+anthropic_client = anthropic.Anthropic()
 anthropic_client = track_anthropic(anthropic_client, project_name="anthropic-integration-demo")
 
 PROMPT = "Why is it important to use a LLM Monitoring like CometML Opik tool that allows you to log traces and spans when working with Anthropic LLM Models?"

diff --git a/apps/opik-documentation/documentation/docs/tracing/integrations/bedrock.md b/apps/opik-documentation/documentation/docs/tracing/integrations/bedrock.md
@@ -1,6 +1,7 @@
 ---
 sidebar_label: Bedrock
 description: Describes how to track Bedrock LLM calls using Opik
+test_code_snippets: false
 ---
 
 # AWS Bedrock

diff --git a/apps/opik-documentation/documentation/docs/tracing/integrations/dify.mdx b/apps/opik-documentation/documentation/docs/tracing/integrations/dify.mdx
@@ -1,6 +1,7 @@
 ---
 sidebar_label: Dify
 description: Describes how to use Opik with Dify
+test_code_snippets: false
 ---
 
 import Tabs from "@theme/Tabs";

diff --git a/apps/opik-documentation/documentation/docs/tracing/integrations/gemini.md b/apps/opik-documentation/documentation/docs/tracing/integrations/gemini.md
@@ -44,7 +44,7 @@ In order to configure Gemini, you will need to have:
 
 Once you have these, you can set them as environment variables:
 
-```python
+```python pytest_codeblocks_skip="true"
 import os
 
 os.environ["GEMINI_API_KEY"] = "" # Your Google AI Studio Gemini API Key
@@ -76,14 +76,17 @@ response = litellm.completion(
 If you are using LiteLLM within a function tracked with the [`@track`](/tracing/log_traces#using-function-decorators) decorator, you will need to pass the `current_span_data` as metadata to the `litellm.completion` call:
 
 ```python
+from opik import track, opik_context
+import litellm
+
 @track
 def generate_story(prompt):
     response = litellm.completion(
         model="gemini/gemini-pro",
         messages=[{"role": "user", "content": prompt}],
         metadata={
             "opik": {
-                "current_span_data": get_current_span_data(),
+                "current_span_data": opik_context.get_current_span_data(),
             },
         },
     )
@@ -98,7 +101,7 @@ def generate_topic():
         messages=[{"role": "user", "content": prompt}],
         metadata={
             "opik": {
-                "current_span_data": get_current_span_data(),
+                "current_span_data": opik_context.get_current_span_data(),
             },
         },
     )

diff --git a/apps/opik-documentation/documentation/docs/tracing/integrations/groq.md b/apps/opik-documentation/documentation/docs/tracing/integrations/groq.md
@@ -76,14 +76,17 @@ response = litellm.completion(
 If you are using LiteLLM within a function tracked with the [`@track`](/tracing/log_traces#using-function-decorators) decorator, you will need to pass the `current_span_data` as metadata to the `litellm.completion` call:
 
 ```python
+from opik import track, opik_context
+import litellm
+
 @track
 def generate_story(prompt):
     response = litellm.completion(
         model="groq/llama3-8b-8192",
         messages=[{"role": "user", "content": prompt}],
         metadata={
             "opik": {
-                "current_span_data": get_current_span_data(),
+                "current_span_data": opik_context.get_current_span_data(),
             },
         },
     )
@@ -94,11 +97,11 @@ def generate_story(prompt):
 def generate_topic():
     prompt = "Generate a topic for a story about Opik."
     response = litellm.completion(
-        model="Groq/Groq-pro",
+        model="groq/llama-3.3-70b-versatile",
         messages=[{"role": "user", "content": prompt}],
         metadata={
             "opik": {
-                "current_span_data": get_current_span_data(),
+                "current_span_data": opik_context.get_current_span_data(),
             },
         },
     )