[Build] Separate notebooks testing (#825)

The tests of our notebooks (currently running in the CI build) appear to be particularly unreliable, and also very slow. This is likely not unrelated to the large number of model calls which they make. For now, split them into a CI build of their own, so failures in the 'real' CI build are more obvious.
guidance-ai · May 15, 2024 · 2f1bfb9 · 2f1bfb9
1 parent a75896a
commit 2f1bfb9
Show file tree

Hide file tree

Showing 6 changed files with 98 additions and 3 deletions.
diff --git a/.github/workflows/action_gpu_unit_tests.yml b/.github/workflows/action_gpu_unit_tests.yml
@@ -64,7 +64,7 @@ jobs:
         run: |
           pytest -vv --cov=guidance --cov-report=xml --cov-report=term-missing \
             --selected_model ${{ matrix.model }} \
-            -m "not (server or needs_credentials)" \
+            -m "not (server or needs_credentials or notebooks)" \
             ./tests/
       - name: Upload coverage reports to Codecov
         uses: codecov/codecov-action@v4

diff --git a/.github/workflows/action_plain_unit_tests.yml b/.github/workflows/action_plain_unit_tests.yml
@@ -44,7 +44,7 @@ jobs:
         run: |
           pytest --cov=guidance --cov-report=xml --cov-report=term-missing \
           --selected_model ${{ inputs.model }} \
-          -m "not (needs_credentials or use_gpu or server)" \
+          -m "not (needs_credentials or use_gpu or server or notebooks)" \
           ./tests/
       - name: Upload coverage reports to Codecov
         uses: codecov/codecov-action@v4

diff --git a/.github/workflows/ci_tests.yml b/.github/workflows/ci_tests.yml
@@ -79,7 +79,7 @@ jobs:
           AZURE_AI_STUDIO_LLAMA3_CHAT_KEY: ${{ secrets.AZURE_AI_STUDIO_LLAMA3_CHAT_KEY }}
         run: |
           pytest --cov=guidance --cov-report=xml --cov-report=term-missing \
-            -m needs_credentials \
+            -m "needs_credentials and (not notebooks)" \
             ./tests/
       - name: Upload coverage reports to Codecov
         uses: codecov/codecov-action@v4

diff --git a/.github/workflows/notebook_tests.yml b/.github/workflows/notebook_tests.yml
@@ -0,0 +1,90 @@
+# Notebook Tests
+
+# These should only be run on main, because they access secrets
+# Not part of the regular CI run, since notebook tests seem
+# particularly flaky
+
+name: Notebook Tests
+
+
+on:
+  workflow_dispatch:
+  push:
+    branches: [main]
+  schedule:
+    # * is a special character in YAML so we quote this string
+    # Run at 0830 UTC every day
+    - cron:  '30 08 * * *'
+
+jobs:
+  build:
+
+    runs-on: gpu-runner
+    strategy:
+      fail-fast: false # Don't cancel all on first failure
+      matrix:
+        python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"]
+
+    steps:
+      - uses: actions/checkout@v4
+      - name: Install Rust
+        shell: bash
+        run: |
+           curl https://sh.rustup.rs -sSf | sh -s -- -y --default-toolchain 1.75.0
+           echo "$HOME/.cargo/bin" >> $GITHUB_PATH
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python-version }}
+      - name: Show GPUs
+        run: |
+          nvidia-smi
+      - name: Update Ubuntu
+        run: |
+          sudo apt-get update
+          sudo apt-get -y upgrade
+      - name: Ensure NVIDIA SDK available
+        run: |
+          sudo apt-get -y install cuda-toolkit
+          echo "/usr/local/cuda-12.4/bin" >> $GITHUB_PATH
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install pytest
+          pip install -e .[all,test]
+          if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
+      - name: GPU pip installs
+        run: |
+          pip install accelerate
+          CMAKE_ARGS="-DLLAMA_CUBLAS=on" pip install "llama-cpp-python<0.2.58"
+      - name: Check GPU available
+        run: |
+          python -c "import torch; assert torch.cuda.is_available()"
+      - name: Test with pytest
+        env:
+          HF_TOKEN: ${{ secrets.HF_TOKEN }}
+          # Configure endpoints for Azure OpenAI
+          AZUREAI_CHAT_ENDPOINT: ${{ secrets.AZUREAI_CHAT_ENDPOINT }}
+          AZUREAI_CHAT_KEY: ${{ secrets.AZUREAI_CHAT_KEY }}
+          AZUREAI_CHAT_MODEL: ${{ secrets.AZUREAI_CHAT_MODEL }}
+          AZUREAI_COMPLETION_ENDPOINT: ${{ secrets.AZUREAI_COMPLETION_ENDPOINT }}
+          AZUREAI_COMPLETION_KEY: ${{ secrets.AZUREAI_COMPLETION_KEY }}
+          AZUREAI_COMPLETION_MODEL: ${{ secrets.AZUREAI_COMPLETION_MODEL }}
+          # Configure endpoints for Azure AI Studio
+          AZURE_AI_STUDIO_PHI3_ENDPOINT: ${{ vars.AZURE_AI_STUDIO_PHI3_ENDPOINT }}
+          AZURE_AI_STUDIO_PHI3_DEPLOYMENT: ${{ vars.AZURE_AI_STUDIO_PHI3_DEPLOYMENT }}
+          AZURE_AI_STUDIO_PHI3_KEY: ${{ secrets.AZURE_AI_STUDIO_PHI3_KEY }}
+          AZURE_AI_STUDIO_MISTRAL_CHAT_ENDPOINT: ${{ vars.AZURE_AI_STUDIO_MISTRAL_CHAT_ENDPOINT }}
+          AZURE_AI_STUDIO_MISTRAL_CHAT_DEPLOYMENT: ${{ vars.AZURE_AI_STUDIO_MISTRAL_CHAT_DEPLOYMENT }}
+          AZURE_AI_STUDIO_MISTRAL_CHAT_KEY: ${{ secrets.AZURE_AI_STUDIO_MISTRAL_CHAT_KEY }}
+          AZURE_AI_STUDIO_LLAMA3_CHAT_ENDPOINT: ${{ vars.AZURE_AI_STUDIO_LLAMA3_CHAT_ENDPOINT }}
+          AZURE_AI_STUDIO_LLAMA3_CHAT_DEPLOYMENT: ${{ vars.AZURE_AI_STUDIO_LLAMA3_CHAT_DEPLOYMENT }}
+          AZURE_AI_STUDIO_LLAMA3_CHAT_KEY: ${{ secrets.AZURE_AI_STUDIO_LLAMA3_CHAT_KEY }}
+        run: |
+          pytest --cov=guidance --cov-report=xml --cov-report=term-missing \
+            -m "notebooks" \
+            ./tests/
+      - name: Upload coverage reports to Codecov
+        uses: codecov/codecov-action@v4
+        with:
+          token: ${{ secrets.CODECOV_TOKEN }}
diff --git a/pyproject.toml b/pyproject.toml
@@ -12,6 +12,7 @@ markers = [
     "server: Potentially unreliable tests of the server functionality",
     "needs_credentials: Test which needs access to credentials to work",
     "use_gpu: Test which uses always uses GPU (model not injected via fixture)",
+    "notebooks: Test of the notebooks",
 ]
 
 [tool.black]

diff --git a/tests/test_notebooks.py b/tests/test_notebooks.py
@@ -6,6 +6,10 @@
 import papermill as pm
 import pytest
 
+# Everything in here is a notebook...
+# Mark is configured in pyproject.toml
+pytestmark = pytest.mark.notebooks
+
 BASE_NB_PATH = pathlib.Path("./notebooks").absolute()