databricks · pietern · Oct 15, 2024 · shreyas-goenka · Oct 16, 2024 · lennartkats-db
diff --git a/knowledge_base/job_with_custom_sys_path/.gitignore b/knowledge_base/job_with_custom_sys_path/.gitignore
@@ -0,0 +1,4 @@
+/.databricks
+/.venv
+/.vscode
+__pycache__
diff --git a/knowledge_base/job_with_custom_sys_path/README.md b/knowledge_base/job_with_custom_sys_path/README.md
@@ -0,0 +1,43 @@
+# Job with custom `sys.path`
+
+This example demonstrates how to:
+1. Define a job that takes parameters with values that derive from the bundle.
+2. Use the path parameter to augment Python's `sys.path` to import a module from the bundle.
+3. Access job parameters from the imported module.
+
+## Prerequisites
+
+* Databricks CLI v0.230.0 or above
+
+## Usage
+
+This example includes a unit test for the function defined under `my_custom_library` that you can execute on your machine.
+
+```bash
+# Setup a virtual environment
+uv venv
+source .venv/bin/activate
+uv pip install -r ./requirements.txt
+
+# Run the unit test
+python -m pytest
+```
+
+To deploy the bundle to Databricks, follow these steps:
+
+* Update the `host` field under `workspace` in `databricks.yml` to the Databricks workspace you wish to deploy to.
+* Run `databricks bundle deploy` to deploy the job.
+* Run `databricks bundle run print_bundle_configuration` to run the job.
+
+Example output:
+
+```
+% databricks bundle run print_bundle_configuration
+Run URL: https://...
+
+2024-10-15 11:48:43 "[dev pieter_noordhuis] Example to demonstrate job parameterization" TERMINATED SUCCESS
+```
+
+Navigate to the run URL to observe the output of the loaded configuration file.
+
+You can execute the same steps for the `prod` target.
diff --git a/knowledge_base/job_with_custom_sys_path/config/dev.json b/knowledge_base/job_with_custom_sys_path/config/dev.json
@@ -0,0 +1 @@
+[ "this is my development config" ]
diff --git a/knowledge_base/job_with_custom_sys_path/config/prod.json b/knowledge_base/job_with_custom_sys_path/config/prod.json
@@ -0,0 +1 @@
+[ "this is my production config" ]
diff --git a/knowledge_base/job_with_custom_sys_path/config/test.json b/knowledge_base/job_with_custom_sys_path/config/test.json
@@ -0,0 +1 @@
+[ "this is my test config" ]
diff --git a/knowledge_base/job_with_custom_sys_path/databricks.yml b/knowledge_base/job_with_custom_sys_path/databricks.yml
@@ -0,0 +1,20 @@
+bundle:
+  name: job_with_custom_sys_path
+
+include:
+  - ./resources/*.job.yml
+
+workspace:
+  host: https://e2-dogfood.staging.cloud.databricks.com
+
+targets:
+  dev:
+    default: true
+    mode: development
+
+  prod:
+    mode: production
+
+    # Production mode requires explicit configuration of the identity to use to run the job.
+    run_as:
+      user_name: "${workspace.current_user.userName}"
diff --git a/knowledge_base/job_with_custom_sys_path/my_custom_library/__init__.py b/knowledge_base/job_with_custom_sys_path/my_custom_library/__init__.py
@@ -0,0 +1,5 @@
+from .loader import load_configuration
+
+__all__ = [
+    "load_configuration",
+]
diff --git a/knowledge_base/job_with_custom_sys_path/my_custom_library/loader.py b/knowledge_base/job_with_custom_sys_path/my_custom_library/loader.py
@@ -0,0 +1,15 @@
+import json
+from os import path
+
+from my_custom_library import parameters
+
+
+def load_configuration() -> any:
+    """
+    Load the configuration file for the bundle target.
+    """
+    config_file_path = path.join(
+        parameters.bundle_file_path(), "config", f"{parameters.bundle_target()}.json"
+    )
+    with open(config_file_path, "r") as file:
+        return json.load(file)
diff --git a/knowledge_base/job_with_custom_sys_path/my_custom_library/parameters.py b/knowledge_base/job_with_custom_sys_path/my_custom_library/parameters.py
@@ -0,0 +1,31 @@
+from functools import cache
+
+
+@cache
+def bundle_file_path() -> str:
+    """
+    Return the bundle file path.
+
+    This function expects a job parameter called "bundle_file_path" to be set.
+
+    It is mocked during testing.
+
+    The dbutils import is done inside the function so it is omitted when run locally.
+    """
+    from databricks.sdk.runtime import dbutils
+    return dbutils.widgets.get("bundle_file_path")
+
+
+@cache
+def bundle_target() -> str:
+    """
+    Return the bundle target.
+
+    This function expects a job parameter called "bundle_target" to be set.
+
+    It is mocked during testing.
+
+    The dbutils import is done inside the function so it is omitted when run locally.
+    """
+    from databricks.sdk.runtime import dbutils
+    return dbutils.widgets.get("bundle_target")
diff --git a/knowledge_base/job_with_custom_sys_path/requirements.txt b/knowledge_base/job_with_custom_sys_path/requirements.txt
@@ -0,0 +1,2 @@
+databricks-sdk
+pytest
diff --git a/knowledge_base/job_with_custom_sys_path/resources/print_bundle_configuration.job.yml b/knowledge_base/job_with_custom_sys_path/resources/print_bundle_configuration.job.yml
@@ -0,0 +1,18 @@
+resources:
+  jobs:
+    print_bundle_configuration:
+      name: Example to demonstrate job parameterization
+
+      parameters:
+        - # The bundle deployment's root file path in the workspace.
+          name: "bundle_file_path"
+          default: "${workspace.file_path}"
+
+        - # The bundle target name (e.g. "dev" or "prod").
+          name: "bundle_target"
+          default: "${bundle.target}"
+
+      tasks:
+        - task_key: print
+          notebook_task:
+            notebook_path: ../src/print.ipynb
diff --git a/knowledge_base/job_with_custom_sys_path/src/print.ipynb b/knowledge_base/job_with_custom_sys_path/src/print.ipynb
@@ -0,0 +1,49 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The below cell retrieves the path to this bundle's deployment file path,\n",
+    "and adds it to the Python path."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import sys\n",
+    "from databricks.sdk.runtime import dbutils\n",
+    "bundle_file_path = dbutils.widgets.get(\"bundle_file_path\")\n",
+    "sys.path.append(bundle_file_path)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from my_custom_library import load_configuration\n",
+    "from pprint import pprint\n",
+    "\n",
+    "pprint(load_configuration())"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": ".venv",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "name": "python",
+   "version": "3.12.6"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/knowledge_base/job_with_custom_sys_path/tests/test_load_configuration.py b/knowledge_base/job_with_custom_sys_path/tests/test_load_configuration.py
@@ -0,0 +1,34 @@
+from os import path
+
+import my_custom_library
+import my_custom_library.parameters
+
+
+def mock_bundle_file_path(monkeypatch):
+    def mock():
+        return path.join(path.dirname(__file__), "..")
+
+    monkeypatch.setattr(
+        my_custom_library.parameters,
+        "bundle_file_path",
+        mock,
+    )
+
+
+def mock_bundle_target(monkeypatch):
+    def mock():
+        return "test"
+
+    monkeypatch.setattr(
+        my_custom_library.parameters,
+        "bundle_target",
+        mock,
+    )
+
+
+def test_load_configuration(monkeypatch):
+    mock_bundle_file_path(monkeypatch)
+    mock_bundle_target(monkeypatch)
+
+    configuration = my_custom_library.load_configuration()
+    assert configuration == ["this is my test config"]