From 8c58d485b58e8cd3b364e435fea5ac1d4cabd0e0 Mon Sep 17 00:00:00 2001 From: Ilya Kuznetsov Date: Wed, 18 Dec 2024 17:07:09 +0100 Subject: [PATCH 01/26] tmp: Documentation autogeneration script --- Makefile | 8 +- bundle/internal/docs/docs.go | 235 +++++ bundle/internal/docs/docs.md | 1912 ++++++++++++++++++++++++++++++++++ bundle/internal/docs/main.go | 183 ++++ go.mod | 5 + go.sum | 10 + libs/jsonschema/from_type.go | 4 + 7 files changed, 2355 insertions(+), 2 deletions(-) create mode 100644 bundle/internal/docs/docs.go create mode 100644 bundle/internal/docs/docs.md create mode 100644 bundle/internal/docs/main.go diff --git a/Makefile b/Makefile index f8e7834a50..c39e03a000 100644 --- a/Makefile +++ b/Makefile @@ -29,11 +29,15 @@ snapshot: vendor: @echo "✓ Filling vendor folder with library code ..." @go mod vendor - + schema: @echo "✓ Generating json-schema ..." @go run ./bundle/internal/schema ./bundle/internal/schema ./bundle/schema/jsonschema.json +docs: + @echo "✓ Generating docs ..." + @go run ./bundle/internal/docs ./bundle/internal/schema ./bundle/internal/docs/docs.md + INTEGRATION = gotestsum --format github-actions --rerun-fails --jsonfile output.json --packages "./integration/..." -- -parallel 4 -timeout=2h integration: @@ -42,4 +46,4 @@ integration: integration-short: $(INTEGRATION) -short -.PHONY: lint lintcheck test testonly coverage build snapshot vendor schema integration integration-short +.PHONY: lint lintcheck test testonly coverage build snapshot vendor schema integration integration-short docs diff --git a/bundle/internal/docs/docs.go b/bundle/internal/docs/docs.go new file mode 100644 index 0000000000..23742a38d2 --- /dev/null +++ b/bundle/internal/docs/docs.go @@ -0,0 +1,235 @@ +package main + +import ( + "fmt" + "log" + "os" + "sort" + "strings" + + "github.com/databricks/cli/libs/jsonschema" + + md "github.com/nao1215/markdown" +) + +type rootNode struct { + Title string + Description string + Attributes []attributeNode + Example string + ObjectKeyAttributes []attributeNode + ArrayItemAttributes []attributeNode + TopLevel bool +} + +type attributeNode struct { + Title string + Type string + Description string +} + +type rootProp struct { + k string + v *jsonschema.Schema + topLevel bool +} + +const ( + AdditionalPropertiesMessage = "Each item has the following attributes:" + AdditionalPropertiesAttributeTitle = "" + AdditionalPropertiesAttributeDescription = "The definition of the item" +) + +func getNodes(s jsonschema.Schema, refs map[string]jsonschema.Schema, a annotationFile) []rootNode { + rootProps := []rootProp{} + for k, v := range s.Properties { + rootProps = append(rootProps, rootProp{k, v, true}) + } + nodes := make([]rootNode, 0, len(rootProps)) + + for i := 0; i < len(rootProps); i++ { + k := rootProps[i].k + v := rootProps[i].v + v = resolveRefs(v, refs) + node := rootNode{ + Title: k, + Description: getDescription(v), + TopLevel: rootProps[i].topLevel, + } + + node.Attributes = getAttributes(v.Properties, refs) + rootProps = append(rootProps, extractNodes(k, v.Properties, refs, a)...) + + additionalProps, ok := v.AdditionalProperties.(*jsonschema.Schema) + if ok { + objectKeyType := resolveRefs(additionalProps, refs) + node.ObjectKeyAttributes = getAttributes(objectKeyType.Properties, refs) + rootProps = append(rootProps, extractNodes(k, objectKeyType.Properties, refs, a)...) + } + + if v.Items != nil { + arrayItemType := resolveRefs(v.Items, refs) + node.ArrayItemAttributes = getAttributes(arrayItemType.Properties, refs) + } + + nodes = append(nodes, node) + } + + sort.Slice(nodes, func(i, j int) bool { + return nodes[i].Title < nodes[j].Title + }) + return nodes +} + +func buildMarkdown(nodes []rootNode, outputFile string) error { + f, err := os.Create(outputFile) + if err != nil { + log.Fatal(err) + } + defer f.Close() + + m := md.NewMarkdown(f) + for _, node := range nodes { + if node.TopLevel { + m = m.H2(node.Title) + } else { + m = m.H3(node.Title) + } + m = m.PlainText(node.Description) + + if len(node.ObjectKeyAttributes) > 0 { + m = buildAttributeTable(m, []attributeNode{ + {Title: AdditionalPropertiesAttributeTitle, Type: "Map", Description: AdditionalPropertiesAttributeDescription}, + }) + m = m.PlainText("Each item has the following attributes:") + m = buildAttributeTable(m, node.ObjectKeyAttributes) + + } else if len(node.ArrayItemAttributes) > 0 { + m = m.PlainText(fmt.Sprintf("Each item of `%s` has the following attributes:", node.Title)) + m = buildAttributeTable(m, node.ArrayItemAttributes) + } else if len(node.Attributes) > 0 { + m = m.H4("Attributes") + m = buildAttributeTable(m, node.Attributes) + } + } + + err = m.Build() + if err != nil { + log.Fatal(err) + } + + return nil +} + +func buildAttributeTable(m *md.Markdown, attributes []attributeNode) *md.Markdown { + rows := [][]string{} + for _, n := range attributes { + rows = append(rows, []string{fmt.Sprintf("`%s`", n.Title), n.Type, formatDescription(n.Description)}) + } + m = m.CustomTable(md.TableSet{ + Header: []string{"Key", "Type", "Description"}, + Rows: rows, + }, md.TableOptions{AutoWrapText: false, AutoFormatHeaders: false}) + + return m +} + +func formatDescription(s string) string { + if s == "" { + return "-" + } + return strings.ReplaceAll(s, "\n", " ") +} + +// Build a custom table which we use in Databricks website +func buildCustomAttributeTable(m *md.Markdown, attributes []attributeNode) *md.Markdown { + m = m.PlainText(".. list-table::") + m = m.PlainText(" :header-rows: 1") + + m = m.PlainText(" * - Key") + m = m.PlainText(" - Type") + m = m.PlainText(" - Description") + + for _, a := range attributes { + m = m.PlainText(" * - " + a.Title) + m = m.PlainText(" - " + a.Type) + m = m.PlainText(" - " + a.Description) + } + return m +} + +func getAttributes(props map[string]*jsonschema.Schema, refs map[string]jsonschema.Schema) []attributeNode { + typesMapping := map[string]string{ + "string": "String", + "integer": "Integer", + "boolean": "Boolean", + "array": "Sequence", + "object": "Map", + } + + attributes := []attributeNode{} + for k, v := range props { + v = resolveRefs(v, refs) + typeString := typesMapping[string(v.Type)] + if typeString == "" { + typeString = "Any" + } + attributes = append(attributes, attributeNode{ + Title: k, + Type: typeString, + Description: getDescription(v), + }) + } + sort.Slice(attributes, func(i, j int) bool { + return attributes[i].Title < attributes[j].Title + }) + return attributes +} + +func getDescription(s *jsonschema.Schema) string { + if s.MarkdownDescription != "" { + return s.MarkdownDescription + } + return s.Description +} + +func resolveRefs(s *jsonschema.Schema, schemas map[string]jsonschema.Schema) *jsonschema.Schema { + node := s + + description := s.Description + markdownDescription := s.MarkdownDescription + + for node.Reference != nil { + ref := strings.TrimPrefix(*node.Reference, "#/$defs/") + newNode, ok := schemas[ref] + if !ok { + log.Printf("schema %s not found", ref) + } + + if description == "" { + description = newNode.Description + } + if markdownDescription == "" { + markdownDescription = newNode.MarkdownDescription + } + + node = &newNode + } + + node.Description = description + node.MarkdownDescription = markdownDescription + + return node +} + +func extractNodes(prefix string, props map[string]*jsonschema.Schema, refs map[string]jsonschema.Schema, a annotationFile) []rootProp { + nodes := []rootProp{} + for k, v := range props { + v = resolveRefs(v, refs) + if v.Type == "object" { + nodes = append(nodes, rootProp{prefix + "." + k, v, false}) + } + v.MarkdownDescription = "" + } + return nodes +} diff --git a/bundle/internal/docs/docs.md b/bundle/internal/docs/docs.md new file mode 100644 index 0000000000..006b1727ed --- /dev/null +++ b/bundle/internal/docs/docs.md @@ -0,0 +1,1912 @@ +## artifacts +Defines the attributes to build an artifact +| Key | Type | Description | +|----------|------|----------------------------| +| `` | Map | The definition of the item | + +Each item has the following attributes: +| Key | Type | Description | +|--------------|----------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `build` | String | An optional set of non-default build commands that you want to run locally before deployment. For Python wheel builds, the Databricks CLI assumes that it can find a local install of the Python wheel package to run builds, and it runs the command python setup.py bdist_wheel by default during each bundle deployment. To specify multiple build commands, separate each command with double-ampersand (&&) characters. | +| `executable` | String | The executable type. | +| `files` | Sequence | The source files for the artifact, defined as an [_](#artifact_file). | +| `path` | String | The location where the built artifact will be saved. | +| `type` | String | The type of the artifact. Valid values are `wheel` or `jar` | + +## bundle +The attributes of the bundle. See [_](/dev-tools/bundles/settings.md#bundle) +#### Attributes +| Key | Type | Description | +|--------------------------|--------|------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `cluster_id` | String | The ID of a cluster to use to run the bundle. See [_](/dev-tools/bundles/settings.md#cluster_id). | +| `compute_id` | String | - | +| `databricks_cli_version` | String | The Databricks CLI version to use for the bundle. See [_](/dev-tools/bundles/settings.md#databricks_cli_version). | +| `deployment` | Map | The definition of the bundle deployment. For supported attributes, see [_](#deployment) and [_](/dev-tools/bundles/deployment-modes.md). | +| `git` | Map | The Git version control details that are associated with your bundle. For supported attributes, see [_](#git) and [_](/dev-tools/bundles/settings.md#git). | +| `name` | String | The name of the bundle. | +| `uuid` | String | - | + +### bundle.deployment +The definition of the bundle deployment +#### Attributes +| Key | Type | Description | +|-----------------------|---------|---------------------------------------------------------------------------------------------------------| +| `fail_on_active_runs` | Boolean | Whether to fail on active runs. If this is set to true a deployment that is running can be interrupted. | +| `lock` | Map | The deployment lock attributes. See [_](#lock). | + +### bundle.deployment.lock +The deployment lock attributes. +#### Attributes +| Key | Type | Description | +|-----------|---------|----------------------------------------------| +| `enabled` | Boolean | Whether this lock is enabled. | +| `force` | Boolean | Whether to force this lock if it is enabled. | + +### bundle.git +The Git version control details that are associated with your bundle. +#### Attributes +| Key | Type | Description | +|--------------|--------|--------------------------------------------------------------------------------| +| `branch` | String | The Git branch name. See [_](/dev-tools/bundles/settings.md#git). | +| `origin_url` | String | The origin URL of the repository. See [_](/dev-tools/bundles/settings.md#git). | + +## experimental +Defines attributes for experimental features. +#### Attributes +| Key | Type | Description | +|------------------------|---------|-------------------------------------------| +| `pydabs` | Map | The PyDABs configuration. | +| `python_wheel_wrapper` | Boolean | Whether to use a Python wheel wrapper | +| `scripts` | Map | The commands to run | +| `use_legacy_run_as` | Boolean | Whether to use the legacy run_as behavior | + +### experimental.pydabs +The PyDABs configuration. +#### Attributes +| Key | Type | Description | +|-------------|----------|-------------------------------------------------------------------------------------| +| `enabled` | Boolean | Whether or not PyDABs (Private Preview) is enabled | +| `import` | Sequence | The PyDABs project to import to discover resources, resource generator and mutators | +| `venv_path` | String | The Python virtual environment path | + +### experimental.scripts +The commands to run +## include +Specifies a list of path globs that contain configuration files to include within the bundle. See [_](/dev-tools/bundles/settings.md#include) +## permissions +Defines the permissions to apply to experiments, jobs, pipelines, and models defined in the bundle. See [_](/dev-tools/bundles/settings.md#permissions) and [_](/dev-tools/bundles/permissions.md). +Each item of `permissions` has the following attributes: +| Key | Type | Description | +|--------------------------|--------|----------------------------------------------------------------------------------------| +| `group_name` | String | The name of the group that has the permission set in level. | +| `level` | String | The allowed permission for user, group, service principal defined for this permission. | +| `service_principal_name` | String | The name of the service principal that has the permission set in level. | +| `user_name` | String | The name of the user that has the permission set in level. | + +## presets +Defines bundle deployment presets. See [_](/dev-tools/bundles/deployment-modes.md#presets). +#### Attributes +| Key | Type | Description | +|----------------------------|---------|-------------------------------------------------------------------------------------------------| +| `jobs_max_concurrent_runs` | Integer | The maximum concurrent runs for a job. | +| `name_prefix` | String | The prefix for job runs of the bundle. | +| `pipelines_development` | Boolean | Whether pipeline deployments should be locked in development mode. | +| `source_linked_deployment` | Boolean | Whether to link the deployment to the bundle source. | +| `tags` | Map | The tags for the bundle deployment. | +| `trigger_pause_status` | String | A pause status to apply to all job triggers and schedules. Valid values are PAUSED or UNPAUSED. | + +### presets.tags +The tags for the bundle deployment. +## resources +Specifies information about the Databricks resources used by the bundle. See [_](/dev-tools/bundles/resources.md). +#### Attributes +| Key | Type | Description | +|---------------------------|------|------------------------------------------------------------------------------------------------------------------------| +| `clusters` | Map | The cluster definitions for the bundle. See [_](/dev-tools/bundles/resources.md#cluster) | +| `dashboards` | Map | The dashboard definitions for the bundle. See [_](/dev-tools/bundles/resources.md#dashboard) | +| `experiments` | Map | The experiment definitions for the bundle. See [_](/dev-tools/bundles/resources.md#experiment) | +| `jobs` | Map | The job definitions for the bundle. See [_](/dev-tools/bundles/resources.md#job) | +| `model_serving_endpoints` | Map | The model serving endpoint definitions for the bundle. See [_](/dev-tools/bundles/resources.md#model_serving_endpoint) | +| `models` | Map | The model definitions for the bundle. See [_](/dev-tools/bundles/resources.md#model) | +| `pipelines` | Map | The pipeline definitions for the bundle. See [_](/dev-tools/bundles/resources.md#pipeline) | +| `quality_monitors` | Map | The quality monitor definitions for the bundle. See [_](/dev-tools/bundles/resources.md#quality_monitor) | +| `registered_models` | Map | The registered model definitions for the bundle. See [_](/dev-tools/bundles/resources.md#registered_model) | +| `schemas` | Map | The schema definitions for the bundle. See [_](/dev-tools/bundles/resources.md#schema) | +| `volumes` | Map | - | + +### resources.clusters +The cluster definitions for the bundle. +| Key | Type | Description | +|----------|------|----------------------------| +| `` | Map | The definition of the item | + +Each item has the following attributes: +| Key | Type | Description | +|--------------------------------|----------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `apply_policy_default_values` | Boolean | When set to true, fixed and default values from the policy will be used for fields that are omitted. When set to false, only fixed values from the policy will be applied. | +| `autoscale` | Map | Parameters needed in order to automatically scale clusters up and down based on load. Note: autoscaling works best with DB runtime versions 3.0 or later. | +| `autotermination_minutes` | Integer | Automatically terminates the cluster after it is inactive for this time in minutes. If not set, this cluster will not be automatically terminated. If specified, the threshold must be between 10 and 10000 minutes. Users can also set this value to 0 to explicitly disable automatic termination. | +| `aws_attributes` | Map | Attributes related to clusters running on Amazon Web Services. If not specified at cluster creation, a set of default values will be used. | +| `azure_attributes` | Map | Attributes related to clusters running on Microsoft Azure. If not specified at cluster creation, a set of default values will be used. | +| `cluster_log_conf` | Map | The configuration for delivering spark logs to a long-term storage destination. Two kinds of destinations (dbfs and s3) are supported. Only one destination can be specified for one cluster. If the conf is given, the logs will be delivered to the destination every `5 mins`. The destination of driver logs is `$destination/$clusterId/driver`, while the destination of executor logs is `$destination/$clusterId/executor`. | +| `cluster_name` | String | Cluster name requested by the user. This doesn't have to be unique. If not specified at creation, the cluster name will be an empty string. | +| `custom_tags` | Map | Additional tags for cluster resources. Databricks will tag all cluster resources (e.g., AWS instances and EBS volumes) with these tags in addition to `default_tags`. Notes: - Currently, Databricks allows at most 45 custom tags - Clusters can only reuse cloud resources if the resources' tags are a subset of the cluster tags | +| `data_security_mode` | String | - | +| `docker_image` | Map | - | +| `driver_instance_pool_id` | String | The optional ID of the instance pool for the driver of the cluster belongs. The pool cluster uses the instance pool with id (instance_pool_id) if the driver pool is not assigned. | +| `driver_node_type_id` | String | The node type of the Spark driver. Note that this field is optional; if unset, the driver node type will be set as the same value as `node_type_id` defined above. | +| `enable_elastic_disk` | Boolean | Autoscaling Local Storage: when enabled, this cluster will dynamically acquire additional disk space when its Spark workers are running low on disk space. This feature requires specific AWS permissions to function correctly - refer to the User Guide for more details. | +| `enable_local_disk_encryption` | Boolean | Whether to enable LUKS on cluster VMs' local disks | +| `gcp_attributes` | Map | Attributes related to clusters running on Google Cloud Platform. If not specified at cluster creation, a set of default values will be used. | +| `init_scripts` | Sequence | The configuration for storing init scripts. Any number of destinations can be specified. The scripts are executed sequentially in the order provided. If `cluster_log_conf` is specified, init script logs are sent to `//init_scripts`. | +| `instance_pool_id` | String | The optional ID of the instance pool to which the cluster belongs. | +| `node_type_id` | String | This field encodes, through a single value, the resources available to each of the Spark nodes in this cluster. For example, the Spark nodes can be provisioned and optimized for memory or compute intensive workloads. A list of available node types can be retrieved by using the :method:clusters/listNodeTypes API call. | +| `num_workers` | Integer | Number of worker nodes that this cluster should have. A cluster has one Spark Driver and `num_workers` Executors for a total of `num_workers` + 1 Spark nodes. Note: When reading the properties of a cluster, this field reflects the desired number of workers rather than the actual current number of workers. For instance, if a cluster is resized from 5 to 10 workers, this field will immediately be updated to reflect the target size of 10 workers, whereas the workers listed in `spark_info` will gradually increase from 5 to 10 as the new nodes are provisioned. | +| `permissions` | Sequence | - | +| `policy_id` | String | The ID of the cluster policy used to create the cluster if applicable. | +| `runtime_engine` | String | - | +| `single_user_name` | String | Single user name if data_security_mode is `SINGLE_USER` | +| `spark_conf` | Map | An object containing a set of optional, user-specified Spark configuration key-value pairs. Users can also pass in a string of extra JVM options to the driver and the executors via `spark.driver.extraJavaOptions` and `spark.executor.extraJavaOptions` respectively. | +| `spark_env_vars` | Map | An object containing a set of optional, user-specified environment variable key-value pairs. Please note that key-value pair of the form (X,Y) will be exported as is (i.e., `export X='Y'`) while launching the driver and workers. In order to specify an additional set of `SPARK_DAEMON_JAVA_OPTS`, we recommend appending them to `$SPARK_DAEMON_JAVA_OPTS` as shown in the example below. This ensures that all default databricks managed environmental variables are included as well. Example Spark environment variables: `{"SPARK_WORKER_MEMORY": "28000m", "SPARK_LOCAL_DIRS": "/local_disk0"}` or `{"SPARK_DAEMON_JAVA_OPTS": "$SPARK_DAEMON_JAVA_OPTS -Dspark.shuffle.service.enabled=true"}` | +| `spark_version` | String | The Spark version of the cluster, e.g. `3.3.x-scala2.11`. A list of available Spark versions can be retrieved by using the :method:clusters/sparkVersions API call. | +| `ssh_public_keys` | Sequence | SSH public key contents that will be added to each Spark node in this cluster. The corresponding private keys can be used to login with the user name `ubuntu` on port `2200`. Up to 10 keys can be specified. | +| `workload_type` | Map | - | + +### resources.clusters.autoscale +Parameters needed in order to automatically scale clusters up and down based on load. +Note: autoscaling works best with DB runtime versions 3.0 or later. +#### Attributes +| Key | Type | Description | +|---------------|---------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `max_workers` | Integer | The maximum number of workers to which the cluster can scale up when overloaded. Note that `max_workers` must be strictly greater than `min_workers`. | +| `min_workers` | Integer | The minimum number of workers to which the cluster can scale down when underutilized. It is also the initial number of workers the cluster will have after creation. | + +### resources.clusters.aws_attributes +Attributes related to clusters running on Amazon Web Services. +If not specified at cluster creation, a set of default values will be used. +#### Attributes +| Key | Type | Description | +|--------------------------|---------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `availability` | String | - | +| `ebs_volume_count` | Integer | The number of volumes launched for each instance. Users can choose up to 10 volumes. This feature is only enabled for supported node types. Legacy node types cannot specify custom EBS volumes. For node types with no instance store, at least one EBS volume needs to be specified; otherwise, cluster creation will fail. These EBS volumes will be mounted at `/ebs0`, `/ebs1`, and etc. Instance store volumes will be mounted at `/local_disk0`, `/local_disk1`, and etc. If EBS volumes are attached, Databricks will configure Spark to use only the EBS volumes for scratch storage because heterogenously sized scratch devices can lead to inefficient disk utilization. If no EBS volumes are attached, Databricks will configure Spark to use instance store volumes. Please note that if EBS volumes are specified, then the Spark configuration `spark.local.dir` will be overridden. | +| `ebs_volume_iops` | Integer | If using gp3 volumes, what IOPS to use for the disk. If this is not set, the maximum performance of a gp2 volume with the same volume size will be used. | +| `ebs_volume_size` | Integer | The size of each EBS volume (in GiB) launched for each instance. For general purpose SSD, this value must be within the range 100 - 4096. For throughput optimized HDD, this value must be within the range 500 - 4096. | +| `ebs_volume_throughput` | Integer | If using gp3 volumes, what throughput to use for the disk. If this is not set, the maximum performance of a gp2 volume with the same volume size will be used. | +| `ebs_volume_type` | String | - | +| `first_on_demand` | Integer | The first `first_on_demand` nodes of the cluster will be placed on on-demand instances. If this value is greater than 0, the cluster driver node in particular will be placed on an on-demand instance. If this value is greater than or equal to the current cluster size, all nodes will be placed on on-demand instances. If this value is less than the current cluster size, `first_on_demand` nodes will be placed on on-demand instances and the remainder will be placed on `availability` instances. Note that this value does not affect cluster size and cannot currently be mutated over the lifetime of a cluster. | +| `instance_profile_arn` | String | Nodes for this cluster will only be placed on AWS instances with this instance profile. If ommitted, nodes will be placed on instances without an IAM instance profile. The instance profile must have previously been added to the Databricks environment by an account administrator. This feature may only be available to certain customer plans. If this field is ommitted, we will pull in the default from the conf if it exists. | +| `spot_bid_price_percent` | Integer | The bid price for AWS spot instances, as a percentage of the corresponding instance type's on-demand price. For example, if this field is set to 50, and the cluster needs a new `r3.xlarge` spot instance, then the bid price is half of the price of on-demand `r3.xlarge` instances. Similarly, if this field is set to 200, the bid price is twice the price of on-demand `r3.xlarge` instances. If not specified, the default value is 100. When spot instances are requested for this cluster, only spot instances whose bid price percentage matches this field will be considered. Note that, for safety, we enforce this field to be no more than 10000. The default value and documentation here should be kept consistent with CommonConf.defaultSpotBidPricePercent and CommonConf.maxSpotBidPricePercent. | +| `zone_id` | String | Identifier for the availability zone/datacenter in which the cluster resides. This string will be of a form like "us-west-2a". The provided availability zone must be in the same region as the Databricks deployment. For example, "us-west-2a" is not a valid zone id if the Databricks deployment resides in the "us-east-1" region. This is an optional field at cluster creation, and if not specified, a default zone will be used. If the zone specified is "auto", will try to place cluster in a zone with high availability, and will retry placement in a different AZ if there is not enough capacity. The list of available zones as well as the default value can be found by using the `List Zones` method. | + +### resources.clusters.azure_attributes +Attributes related to clusters running on Microsoft Azure. +If not specified at cluster creation, a set of default values will be used. +#### Attributes +| Key | Type | Description | +|----------------------|---------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `availability` | String | - | +| `first_on_demand` | Integer | The first `first_on_demand` nodes of the cluster will be placed on on-demand instances. This value should be greater than 0, to make sure the cluster driver node is placed on an on-demand instance. If this value is greater than or equal to the current cluster size, all nodes will be placed on on-demand instances. If this value is less than the current cluster size, `first_on_demand` nodes will be placed on on-demand instances and the remainder will be placed on `availability` instances. Note that this value does not affect cluster size and cannot currently be mutated over the lifetime of a cluster. | +| `log_analytics_info` | Map | Defines values necessary to configure and run Azure Log Analytics agent | +| `spot_bid_max_price` | Any | The max bid price to be used for Azure spot instances. The Max price for the bid cannot be higher than the on-demand price of the instance. If not specified, the default value is -1, which specifies that the instance cannot be evicted on the basis of price, and only on the basis of availability. Further, the value should > 0 or -1. | + +### resources.clusters.azure_attributes.log_analytics_info +Defines values necessary to configure and run Azure Log Analytics agent +#### Attributes +| Key | Type | Description | +|------------------------------|--------|-----------------------| +| `log_analytics_primary_key` | String | | +| `log_analytics_workspace_id` | String | | + +### resources.clusters.cluster_log_conf +The configuration for delivering spark logs to a long-term storage destination. +Two kinds of destinations (dbfs and s3) are supported. Only one destination can be specified +for one cluster. If the conf is given, the logs will be delivered to the destination every +`5 mins`. The destination of driver logs is `$destination/$clusterId/driver`, while +the destination of executor logs is `$destination/$clusterId/executor`. +#### Attributes +| Key | Type | Description | +|--------|------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `dbfs` | Map | destination needs to be provided. e.g. `{ "dbfs" : { "destination" : "dbfs:/home/cluster_log" } }` | +| `s3` | Map | destination and either the region or endpoint need to be provided. e.g. `{ "s3": { "destination" : "s3://cluster_log_bucket/prefix", "region" : "us-west-2" } }` Cluster iam role is used to access s3, please make sure the cluster iam role in `instance_profile_arn` has permission to write data to the s3 destination. | + +### resources.clusters.cluster_log_conf.dbfs +destination needs to be provided. e.g. +`{ "dbfs" : { "destination" : "dbfs:/home/cluster_log" } }` +#### Attributes +| Key | Type | Description | +|---------------|--------|----------------------------------------| +| `destination` | String | dbfs destination, e.g. `dbfs:/my/path` | + +### resources.clusters.cluster_log_conf.s3 +destination and either the region or endpoint need to be provided. e.g. +`{ "s3": { "destination" : "s3://cluster_log_bucket/prefix", "region" : "us-west-2" } }` +Cluster iam role is used to access s3, please make sure the cluster iam role in +`instance_profile_arn` has permission to write data to the s3 destination. +#### Attributes +| Key | Type | Description | +|---------------------|---------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `canned_acl` | String | (Optional) Set canned access control list for the logs, e.g. `bucket-owner-full-control`. If `canned_cal` is set, please make sure the cluster iam role has `s3:PutObjectAcl` permission on the destination bucket and prefix. The full list of possible canned acl can be found at http://docs.aws.amazon.com/AmazonS3/latest/dev/acl-overview.html#canned-acl. Please also note that by default only the object owner gets full controls. If you are using cross account role for writing data, you may want to set `bucket-owner-full-control` to make bucket owner able to read the logs. | +| `destination` | String | S3 destination, e.g. `s3://my-bucket/some-prefix` Note that logs will be delivered using cluster iam role, please make sure you set cluster iam role and the role has write access to the destination. Please also note that you cannot use AWS keys to deliver logs. | +| `enable_encryption` | Boolean | (Optional) Flag to enable server side encryption, `false` by default. | +| `encryption_type` | String | (Optional) The encryption type, it could be `sse-s3` or `sse-kms`. It will be used only when encryption is enabled and the default type is `sse-s3`. | +| `endpoint` | String | S3 endpoint, e.g. `https://s3-us-west-2.amazonaws.com`. Either region or endpoint needs to be set. If both are set, endpoint will be used. | +| `kms_key` | String | (Optional) Kms key which will be used if encryption is enabled and encryption type is set to `sse-kms`. | +| `region` | String | S3 region, e.g. `us-west-2`. Either region or endpoint needs to be set. If both are set, endpoint will be used. | + +### resources.clusters.custom_tags +Additional tags for cluster resources. Databricks will tag all cluster resources (e.g., AWS +instances and EBS volumes) with these tags in addition to `default_tags`. Notes: + +- Currently, Databricks allows at most 45 custom tags + +- Clusters can only reuse cloud resources if the resources' tags are a subset of the cluster tags +### resources.clusters.docker_image + +#### Attributes +| Key | Type | Description | +|--------------|--------|--------------------------| +| `basic_auth` | Map | - | +| `url` | String | URL of the docker image. | + +### resources.clusters.docker_image.basic_auth + +#### Attributes +| Key | Type | Description | +|------------|--------|----------------------| +| `password` | String | Password of the user | +| `username` | String | Name of the user | + +### resources.clusters.gcp_attributes +Attributes related to clusters running on Google Cloud Platform. +If not specified at cluster creation, a set of default values will be used. +#### Attributes +| Key | Type | Description | +|-----------------------------|---------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `availability` | String | - | +| `boot_disk_size` | Integer | boot disk size in GB | +| `google_service_account` | String | If provided, the cluster will impersonate the google service account when accessing gcloud services (like GCS). The google service account must have previously been added to the Databricks environment by an account administrator. | +| `local_ssd_count` | Integer | If provided, each node (workers and driver) in the cluster will have this number of local SSDs attached. Each local SSD is 375GB in size. Refer to [GCP documentation](https://cloud.google.com/compute/docs/disks/local-ssd#choose_number_local_ssds) for the supported number of local SSDs for each instance type. | +| `use_preemptible_executors` | Boolean | This field determines whether the spark executors will be scheduled to run on preemptible VMs (when set to true) versus standard compute engine VMs (when set to false; default). Note: Soon to be deprecated, use the availability field instead. | +| `zone_id` | String | Identifier for the availability zone in which the cluster resides. This can be one of the following: - "HA" => High availability, spread nodes across availability zones for a Databricks deployment region [default] - "AUTO" => Databricks picks an availability zone to schedule the cluster on. - A GCP availability zone => Pick One of the available zones for (machine type + region) from https://cloud.google.com/compute/docs/regions-zones. | + +### resources.clusters.spark_conf +An object containing a set of optional, user-specified Spark configuration key-value pairs. +Users can also pass in a string of extra JVM options to the driver and the executors via +`spark.driver.extraJavaOptions` and `spark.executor.extraJavaOptions` respectively. + +### resources.clusters.spark_env_vars +An object containing a set of optional, user-specified environment variable key-value pairs. +Please note that key-value pair of the form (X,Y) will be exported as is (i.e., +`export X='Y'`) while launching the driver and workers. + +In order to specify an additional set of `SPARK_DAEMON_JAVA_OPTS`, we recommend appending +them to `$SPARK_DAEMON_JAVA_OPTS` as shown in the example below. This ensures that all +default databricks managed environmental variables are included as well. + +Example Spark environment variables: +`{"SPARK_WORKER_MEMORY": "28000m", "SPARK_LOCAL_DIRS": "/local_disk0"}` or +`{"SPARK_DAEMON_JAVA_OPTS": "$SPARK_DAEMON_JAVA_OPTS -Dspark.shuffle.service.enabled=true"}` +### resources.clusters.workload_type + +#### Attributes +| Key | Type | Description | +|-----------|------|-------------------------------------------------------------------------| +| `clients` | Map | defined what type of clients can use the cluster. E.g. Notebooks, Jobs | + +### resources.clusters.workload_type.clients + defined what type of clients can use the cluster. E.g. Notebooks, Jobs +#### Attributes +| Key | Type | Description | +|-------------|---------|------------------------------------------------------------| +| `jobs` | Boolean | With jobs set, the cluster can be used for jobs | +| `notebooks` | Boolean | With notebooks set, this cluster can be used for notebooks | + +### resources.dashboards +The dashboard definitions for the bundle. +| Key | Type | Description | +|----------|------|----------------------------| +| `` | Map | The definition of the item | + +Each item has the following attributes: +| Key | Type | Description | +|------------------------|----------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `create_time` | String | The timestamp of when the dashboard was created. | +| `dashboard_id` | String | UUID identifying the dashboard. | +| `display_name` | String | The display name of the dashboard. | +| `embed_credentials` | Boolean | - | +| `etag` | String | The etag for the dashboard. Can be optionally provided on updates to ensure that the dashboard has not been modified since the last read. This field is excluded in List Dashboards responses. | +| `file_path` | String | - | +| `lifecycle_state` | String | The state of the dashboard resource. Used for tracking trashed status. | +| `parent_path` | String | The workspace path of the folder containing the dashboard. Includes leading slash and no trailing slash. This field is excluded in List Dashboards responses. | +| `path` | String | The workspace path of the dashboard asset, including the file name. Exported dashboards always have the file extension `.lvdash.json`. This field is excluded in List Dashboards responses. | +| `permissions` | Sequence | - | +| `serialized_dashboard` | Any | The contents of the dashboard in serialized string form. This field is excluded in List Dashboards responses. Use the [get dashboard API](https://docs.databricks.com/api/workspace/lakeview/get) to retrieve an example response, which includes the `serialized_dashboard` field. This field provides the structure of the JSON string that represents the dashboard's layout and components. | +| `update_time` | String | The timestamp of when the dashboard was last updated by the user. This field is excluded in List Dashboards responses. | +| `warehouse_id` | String | The warehouse ID used to run the dashboard. | + +### resources.experiments +The experiment definitions for the bundle. +| Key | Type | Description | +|----------|------|----------------------------| +| `` | Map | The definition of the item | + +Each item has the following attributes: +| Key | Type | Description | +|---------------------|----------|------------------------------------------------------------------------------------------------------------------| +| `artifact_location` | String | Location where artifacts for the experiment are stored. | +| `creation_time` | Integer | Creation time | +| `experiment_id` | String | Unique identifier for the experiment. | +| `last_update_time` | Integer | Last update time | +| `lifecycle_stage` | String | Current life cycle stage of the experiment: "active" or "deleted". Deleted experiments are not returned by APIs. | +| `name` | String | Human readable name that identifies the experiment. | +| `permissions` | Sequence | - | +| `tags` | Sequence | Tags: Additional metadata key-value pairs. | + +### resources.jobs +The job definitions for the bundle. +| Key | Type | Description | +|----------|------|----------------------------| +| `` | Map | The definition of the item | + +Each item has the following attributes: +| Key | Type | Description | +|-------------------------|----------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `budget_policy_id` | String | The id of the user specified budget policy to use for this job. If not specified, a default budget policy may be applied when creating or modifying the job. See `effective_budget_policy_id` for the budget policy used by this workload. | +| `continuous` | Map | An optional continuous property for this job. The continuous property will ensure that there is always one run executing. Only one of `schedule` and `continuous` can be used. | +| `description` | String | An optional description for the job. The maximum length is 27700 characters in UTF-8 encoding. | +| `email_notifications` | Map | An optional set of email addresses that is notified when runs of this job begin or complete as well as when this job is deleted. | +| `environments` | Sequence | A list of task execution environment specifications that can be referenced by serverless tasks of this job. An environment is required to be present for serverless tasks. For serverless notebook tasks, the environment is accessible in the notebook environment panel. For other serverless tasks, the task environment is required to be specified using environment_key in the task settings. | +| `git_source` | Map | An optional specification for a remote Git repository containing the source code used by tasks. Version-controlled source code is supported by notebook, dbt, Python script, and SQL File tasks. If `git_source` is set, these tasks retrieve the file from the remote repository by default. However, this behavior can be overridden by setting `source` to `WORKSPACE` on the task. Note: dbt and SQL File tasks support only version-controlled sources. If dbt or SQL File tasks are used, `git_source` must be defined on the job. | +| `health` | Map | - | +| `job_clusters` | Sequence | A list of job cluster specifications that can be shared and reused by tasks of this job. Libraries cannot be declared in a shared job cluster. You must declare dependent libraries in task settings. | +| `max_concurrent_runs` | Integer | An optional maximum allowed number of concurrent runs of the job. Set this value if you want to be able to execute multiple runs of the same job concurrently. This is useful for example if you trigger your job on a frequent schedule and want to allow consecutive runs to overlap with each other, or if you want to trigger multiple runs which differ by their input parameters. This setting affects only new runs. For example, suppose the job’s concurrency is 4 and there are 4 concurrent active runs. Then setting the concurrency to 3 won’t kill any of the active runs. However, from then on, new runs are skipped unless there are fewer than 3 active runs. This value cannot exceed 1000. Setting this value to `0` causes all new runs to be skipped. | +| `name` | String | An optional name for the job. The maximum length is 4096 bytes in UTF-8 encoding. | +| `notification_settings` | Map | Optional notification settings that are used when sending notifications to each of the `email_notifications` and `webhook_notifications` for this job. | +| `parameters` | Sequence | Job-level parameter definitions | +| `permissions` | Sequence | - | +| `queue` | Map | The queue settings of the job. | +| `run_as` | Map | - | +| `schedule` | Map | An optional periodic schedule for this job. The default behavior is that the job only runs when triggered by clicking “Run Now” in the Jobs UI or sending an API request to `runNow`. | +| `tags` | Map | A map of tags associated with the job. These are forwarded to the cluster as cluster tags for jobs clusters, and are subject to the same limitations as cluster tags. A maximum of 25 tags can be added to the job. | +| `tasks` | Sequence | A list of task specifications to be executed by this job. | +| `timeout_seconds` | Integer | An optional timeout applied to each run of this job. A value of `0` means no timeout. | +| `trigger` | Map | A configuration to trigger a run when certain conditions are met. The default behavior is that the job runs only when triggered by clicking “Run Now” in the Jobs UI or sending an API request to `runNow`. | +| `webhook_notifications` | Map | A collection of system notification IDs to notify when runs of this job begin or complete. | + +### resources.jobs.continuous +An optional continuous property for this job. The continuous property will ensure that there is always one run executing. Only one of `schedule` and `continuous` can be used. +#### Attributes +| Key | Type | Description | +|----------------|--------|----------------------------------------------------------------------------------------------| +| `pause_status` | String | Indicate whether the continuous execution of the job is paused or not. Defaults to UNPAUSED. | + +### resources.jobs.email_notifications +An optional set of email addresses that is notified when runs of this job begin or complete as well as when this job is deleted. +#### Attributes +| Key | Type | Description | +|------------------------------------------|----------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `no_alert_for_skipped_runs` | Boolean | If true, do not send email to recipients specified in `on_failure` if the run is skipped. This field is `deprecated`. Please use the `notification_settings.no_alert_for_skipped_runs` field. | +| `on_duration_warning_threshold_exceeded` | Sequence | A list of email addresses to be notified when the duration of a run exceeds the threshold specified for the `RUN_DURATION_SECONDS` metric in the `health` field. If no rule for the `RUN_DURATION_SECONDS` metric is specified in the `health` field for the job, notifications are not sent. | +| `on_failure` | Sequence | A list of email addresses to be notified when a run unsuccessfully completes. A run is considered to have completed unsuccessfully if it ends with an `INTERNAL_ERROR` `life_cycle_state` or a `FAILED`, or `TIMED_OUT` result_state. If this is not specified on job creation, reset, or update the list is empty, and notifications are not sent. | +| `on_start` | Sequence | A list of email addresses to be notified when a run begins. If not specified on job creation, reset, or update, the list is empty, and notifications are not sent. | +| `on_streaming_backlog_exceeded` | Sequence | A list of email addresses to notify when any streaming backlog thresholds are exceeded for any stream. Streaming backlog thresholds can be set in the `health` field using the following metrics: `STREAMING_BACKLOG_BYTES`, `STREAMING_BACKLOG_RECORDS`, `STREAMING_BACKLOG_SECONDS`, or `STREAMING_BACKLOG_FILES`. Alerting is based on the 10-minute average of these metrics. If the issue persists, notifications are resent every 30 minutes. | +| `on_success` | Sequence | A list of email addresses to be notified when a run successfully completes. A run is considered to have completed successfully if it ends with a `TERMINATED` `life_cycle_state` and a `SUCCESS` result_state. If not specified on job creation, reset, or update, the list is empty, and notifications are not sent. | + +### resources.jobs.git_source +An optional specification for a remote Git repository containing the source code used by tasks. Version-controlled source code is supported by notebook, dbt, Python script, and SQL File tasks. + +If `git_source` is set, these tasks retrieve the file from the remote repository by default. However, this behavior can be overridden by setting `source` to `WORKSPACE` on the task. + +Note: dbt and SQL File tasks support only version-controlled sources. If dbt or SQL File tasks are used, `git_source` must be defined on the job. +#### Attributes +| Key | Type | Description | +|----------------|--------|--------------------------------------------------------------------------------------------------------------------------------------| +| `git_branch` | String | Name of the branch to be checked out and used by this job. This field cannot be specified in conjunction with git_tag or git_commit. | +| `git_commit` | String | Commit to be checked out and used by this job. This field cannot be specified in conjunction with git_branch or git_tag. | +| `git_provider` | String | Unique identifier of the service used to host the Git repository. The value is case insensitive. | +| `git_tag` | String | Name of the tag to be checked out and used by this job. This field cannot be specified in conjunction with git_branch or git_commit. | +| `git_url` | String | URL of the repository to be cloned by this job. | + +### resources.jobs.health + +#### Attributes +| Key | Type | Description | +|---------|----------|-------------| +| `rules` | Sequence | - | + +### resources.jobs.notification_settings +Optional notification settings that are used when sending notifications to each of the `email_notifications` and `webhook_notifications` for this job. +#### Attributes +| Key | Type | Description | +|------------------------------|---------|----------------------------------------------------------------------------------------------------| +| `no_alert_for_canceled_runs` | Boolean | If true, do not send notifications to recipients specified in `on_failure` if the run is canceled. | +| `no_alert_for_skipped_runs` | Boolean | If true, do not send notifications to recipients specified in `on_failure` if the run is skipped. | + +### resources.jobs.queue +The queue settings of the job. +#### Attributes +| Key | Type | Description | +|-----------|---------|-----------------------------------------------------------------| +| `enabled` | Boolean | If true, enable queueing for the job. This is a required field. | + +### resources.jobs.run_as + +#### Attributes +| Key | Type | Description | +|--------------------------|--------|--------------------------------------------------------------------------------------------------------------| +| `service_principal_name` | String | Application ID of an active service principal. Setting this field requires the `servicePrincipal/user` role. | +| `user_name` | String | The email of an active workspace user. Non-admin users can only set this field to their own email. | + +### resources.jobs.schedule +An optional periodic schedule for this job. The default behavior is that the job only runs when triggered by clicking “Run Now” in the Jobs UI or sending an API request to `runNow`. +#### Attributes +| Key | Type | Description | +|--------------------------|--------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `pause_status` | String | Indicate whether this schedule is paused or not. | +| `quartz_cron_expression` | String | A Cron expression using Quartz syntax that describes the schedule for a job. See [Cron Trigger](http://www.quartz-scheduler.org/documentation/quartz-2.3.0/tutorials/crontrigger.html) for details. This field is required. | +| `timezone_id` | String | A Java timezone ID. The schedule for a job is resolved with respect to this timezone. See [Java TimeZone](https://docs.oracle.com/javase/7/docs/api/java/util/TimeZone.html) for details. This field is required. | + +### resources.jobs.tags +A map of tags associated with the job. These are forwarded to the cluster as cluster tags for jobs clusters, and are subject to the same limitations as cluster tags. A maximum of 25 tags can be added to the job. +### resources.jobs.trigger +A configuration to trigger a run when certain conditions are met. The default behavior is that the job runs only when triggered by clicking “Run Now” in the Jobs UI or sending an API request to `runNow`. +#### Attributes +| Key | Type | Description | +|----------------|--------|-------------------------------------------------------------------------| +| `file_arrival` | Map | File arrival trigger settings. | +| `pause_status` | String | Whether this trigger is paused or not. | +| `periodic` | Map | Periodic trigger settings. | +| `table` | Map | Old table trigger settings name. Deprecated in favor of `table_update`. | +| `table_update` | Map | - | + +### resources.jobs.trigger.file_arrival +File arrival trigger settings. +#### Attributes +| Key | Type | Description | +|-------------------------------------|---------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `min_time_between_triggers_seconds` | Integer | If set, the trigger starts a run only after the specified amount of time passed since the last time the trigger fired. The minimum allowed value is 60 seconds | +| `url` | String | URL to be monitored for file arrivals. The path must point to the root or a subpath of the external location. | +| `wait_after_last_change_seconds` | Integer | If set, the trigger starts a run only after no file activity has occurred for the specified amount of time. This makes it possible to wait for a batch of incoming files to arrive before triggering a run. The minimum allowed value is 60 seconds. | + +### resources.jobs.trigger.periodic +Periodic trigger settings. +#### Attributes +| Key | Type | Description | +|------------|---------|-----------------------------------------------| +| `interval` | Integer | The interval at which the trigger should run. | +| `unit` | String | The unit of time for the interval. | + +### resources.jobs.trigger.table +Old table trigger settings name. Deprecated in favor of `table_update`. +#### Attributes +| Key | Type | Description | +|-------------------------------------|----------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `condition` | String | The table(s) condition based on which to trigger a job run. | +| `min_time_between_triggers_seconds` | Integer | If set, the trigger starts a run only after the specified amount of time has passed since the last time the trigger fired. The minimum allowed value is 60 seconds. | +| `table_names` | Sequence | A list of Delta tables to monitor for changes. The table name must be in the format `catalog_name.schema_name.table_name`. | +| `wait_after_last_change_seconds` | Integer | If set, the trigger starts a run only after no table updates have occurred for the specified time and can be used to wait for a series of table updates before triggering a run. The minimum allowed value is 60 seconds. | + +### resources.jobs.trigger.table_update + +#### Attributes +| Key | Type | Description | +|-------------------------------------|----------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `condition` | String | The table(s) condition based on which to trigger a job run. | +| `min_time_between_triggers_seconds` | Integer | If set, the trigger starts a run only after the specified amount of time has passed since the last time the trigger fired. The minimum allowed value is 60 seconds. | +| `table_names` | Sequence | A list of Delta tables to monitor for changes. The table name must be in the format `catalog_name.schema_name.table_name`. | +| `wait_after_last_change_seconds` | Integer | If set, the trigger starts a run only after no table updates have occurred for the specified time and can be used to wait for a series of table updates before triggering a run. The minimum allowed value is 60 seconds. | + +### resources.jobs.webhook_notifications +A collection of system notification IDs to notify when runs of this job begin or complete. +#### Attributes +| Key | Type | Description | +|------------------------------------------|----------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `on_duration_warning_threshold_exceeded` | Sequence | An optional list of system notification IDs to call when the duration of a run exceeds the threshold specified for the `RUN_DURATION_SECONDS` metric in the `health` field. A maximum of 3 destinations can be specified for the `on_duration_warning_threshold_exceeded` property. | +| `on_failure` | Sequence | An optional list of system notification IDs to call when the run fails. A maximum of 3 destinations can be specified for the `on_failure` property. | +| `on_start` | Sequence | An optional list of system notification IDs to call when the run starts. A maximum of 3 destinations can be specified for the `on_start` property. | +| `on_streaming_backlog_exceeded` | Sequence | An optional list of system notification IDs to call when any streaming backlog thresholds are exceeded for any stream. Streaming backlog thresholds can be set in the `health` field using the following metrics: `STREAMING_BACKLOG_BYTES`, `STREAMING_BACKLOG_RECORDS`, `STREAMING_BACKLOG_SECONDS`, or `STREAMING_BACKLOG_FILES`. Alerting is based on the 10-minute average of these metrics. If the issue persists, notifications are resent every 30 minutes. A maximum of 3 destinations can be specified for the `on_streaming_backlog_exceeded` property. | +| `on_success` | Sequence | An optional list of system notification IDs to call when the run completes successfully. A maximum of 3 destinations can be specified for the `on_success` property. | + +### resources.model_serving_endpoints +The model serving endpoint definitions for the bundle. +| Key | Type | Description | +|----------|------|----------------------------| +| `` | Map | The definition of the item | + +Each item has the following attributes: +| Key | Type | Description | +|-------------------|----------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `ai_gateway` | Map | The AI Gateway configuration for the serving endpoint. NOTE: only external model endpoints are supported as of now. | +| `config` | Map | The core config of the serving endpoint. | +| `name` | String | The name of the serving endpoint. This field is required and must be unique across a Databricks workspace. An endpoint name can consist of alphanumeric characters, dashes, and underscores. | +| `permissions` | Sequence | - | +| `rate_limits` | Sequence | Rate limits to be applied to the serving endpoint. NOTE: this field is deprecated, please use AI Gateway to manage rate limits. | +| `route_optimized` | Boolean | Enable route optimization for the serving endpoint. | +| `tags` | Sequence | Tags to be attached to the serving endpoint and automatically propagated to billing logs. | + +### resources.model_serving_endpoints.ai_gateway +The AI Gateway configuration for the serving endpoint. NOTE: only external model endpoints are supported as of now. +#### Attributes +| Key | Type | Description | +|--------------------------|----------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `guardrails` | Map | Configuration for AI Guardrails to prevent unwanted data and unsafe data in requests and responses. | +| `inference_table_config` | Map | Configuration for payload logging using inference tables. Use these tables to monitor and audit data being sent to and received from model APIs and to improve model quality. | +| `rate_limits` | Sequence | Configuration for rate limits which can be set to limit endpoint traffic. | +| `usage_tracking_config` | Map | Configuration to enable usage tracking using system tables. These tables allow you to monitor operational usage on endpoints and their associated costs. | + +### resources.model_serving_endpoints.ai_gateway.guardrails +Configuration for AI Guardrails to prevent unwanted data and unsafe data in requests and responses. +#### Attributes +| Key | Type | Description | +|----------|------|---------------------------------------------| +| `input` | Map | Configuration for input guardrail filters. | +| `output` | Map | Configuration for output guardrail filters. | + +### resources.model_serving_endpoints.ai_gateway.guardrails.input +Configuration for input guardrail filters. +#### Attributes +| Key | Type | Description | +|--------------------|----------|--------------------------------------------------------------------------------------------------------------------------------------------| +| `invalid_keywords` | Sequence | List of invalid keywords. AI guardrail uses keyword or string matching to decide if the keyword exists in the request or response content. | +| `pii` | Map | Configuration for guardrail PII filter. | +| `safety` | Boolean | Indicates whether the safety filter is enabled. | +| `valid_topics` | Sequence | The list of allowed topics. Given a chat request, this guardrail flags the request if its topic is not in the allowed topics. | + +### resources.model_serving_endpoints.ai_gateway.guardrails.input.pii +Configuration for guardrail PII filter. +#### Attributes +| Key | Type | Description | +|------------|--------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `behavior` | String | Behavior for PII filter. Currently only 'BLOCK' is supported. If 'BLOCK' is set for the input guardrail and the request contains PII, the request is not sent to the model server and 400 status code is returned; if 'BLOCK' is set for the output guardrail and the model response contains PII, the PII info in the response is redacted and 400 status code is returned. | + +### resources.model_serving_endpoints.ai_gateway.guardrails.output +Configuration for output guardrail filters. +#### Attributes +| Key | Type | Description | +|--------------------|----------|--------------------------------------------------------------------------------------------------------------------------------------------| +| `invalid_keywords` | Sequence | List of invalid keywords. AI guardrail uses keyword or string matching to decide if the keyword exists in the request or response content. | +| `pii` | Map | Configuration for guardrail PII filter. | +| `safety` | Boolean | Indicates whether the safety filter is enabled. | +| `valid_topics` | Sequence | The list of allowed topics. Given a chat request, this guardrail flags the request if its topic is not in the allowed topics. | + +### resources.model_serving_endpoints.ai_gateway.guardrails.output.pii +Configuration for guardrail PII filter. +#### Attributes +| Key | Type | Description | +|------------|--------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `behavior` | String | Behavior for PII filter. Currently only 'BLOCK' is supported. If 'BLOCK' is set for the input guardrail and the request contains PII, the request is not sent to the model server and 400 status code is returned; if 'BLOCK' is set for the output guardrail and the model response contains PII, the PII info in the response is redacted and 400 status code is returned. | + +### resources.model_serving_endpoints.ai_gateway.inference_table_config +Configuration for payload logging using inference tables. Use these tables to monitor and audit data being sent to and received from model APIs and to improve model quality. +#### Attributes +| Key | Type | Description | +|---------------------|---------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `catalog_name` | String | The name of the catalog in Unity Catalog. Required when enabling inference tables. NOTE: On update, you have to disable inference table first in order to change the catalog name. | +| `enabled` | Boolean | Indicates whether the inference table is enabled. | +| `schema_name` | String | The name of the schema in Unity Catalog. Required when enabling inference tables. NOTE: On update, you have to disable inference table first in order to change the schema name. | +| `table_name_prefix` | String | The prefix of the table in Unity Catalog. NOTE: On update, you have to disable inference table first in order to change the prefix name. | + +### resources.model_serving_endpoints.ai_gateway.usage_tracking_config +Configuration to enable usage tracking using system tables. These tables allow you to monitor operational usage on endpoints and their associated costs. +#### Attributes +| Key | Type | Description | +|-----------|---------|-----------------------------------| +| `enabled` | Boolean | Whether to enable usage tracking. | + +### resources.model_serving_endpoints.config +The core config of the serving endpoint. +#### Attributes +| Key | Type | Description | +|-----------------------|----------|--------------------------------------------------------------------------------------------------------------------------------------------------| +| `auto_capture_config` | Map | Configuration for Inference Tables which automatically logs requests and responses to Unity Catalog. | +| `served_entities` | Sequence | A list of served entities for the endpoint to serve. A serving endpoint can have up to 15 served entities. | +| `served_models` | Sequence | (Deprecated, use served_entities instead) A list of served models for the endpoint to serve. A serving endpoint can have up to 15 served models. | +| `traffic_config` | Map | The traffic config defining how invocations to the serving endpoint should be routed. | + +### resources.model_serving_endpoints.config.auto_capture_config +Configuration for Inference Tables which automatically logs requests and responses to Unity Catalog. +#### Attributes +| Key | Type | Description | +|---------------------|---------|------------------------------------------------------------------------------------------------------------------------------------------| +| `catalog_name` | String | The name of the catalog in Unity Catalog. NOTE: On update, you cannot change the catalog name if the inference table is already enabled. | +| `enabled` | Boolean | Indicates whether the inference table is enabled. | +| `schema_name` | String | The name of the schema in Unity Catalog. NOTE: On update, you cannot change the schema name if the inference table is already enabled. | +| `table_name_prefix` | String | The prefix of the table in Unity Catalog. NOTE: On update, you cannot change the prefix name if the inference table is already enabled. | + +### resources.model_serving_endpoints.config.traffic_config +The traffic config defining how invocations to the serving endpoint should be routed. +#### Attributes +| Key | Type | Description | +|----------|----------|---------------------------------------------------------------| +| `routes` | Sequence | The list of routes that define traffic to each served entity. | + +### resources.models +The model definitions for the bundle. +| Key | Type | Description | +|----------|------|----------------------------| +| `` | Map | The definition of the item | + +Each item has the following attributes: +| Key | Type | Description | +|--------------------------|----------|-------------------------------------------------------------------------------------------------------| +| `creation_timestamp` | Integer | Timestamp recorded when this `registered_model` was created. | +| `description` | String | Description of this `registered_model`. | +| `last_updated_timestamp` | Integer | Timestamp recorded when metadata for this `registered_model` was last updated. | +| `latest_versions` | Sequence | Collection of latest model versions for each stage. Only contains models with current `READY` status. | +| `name` | String | Unique name for the model. | +| `permissions` | Sequence | - | +| `tags` | Sequence | Tags: Additional metadata key-value pairs for this `registered_model`. | +| `user_id` | String | User that created this `registered_model` | + +### resources.pipelines +The pipeline definitions for the bundle. +| Key | Type | Description | +|----------|------|----------------------------| +| `` | Map | The definition of the item | + +Each item has the following attributes: +| Key | Type | Description | +|------------------------|----------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `budget_policy_id` | String | Budget policy of this pipeline. | +| `catalog` | String | A catalog in Unity Catalog to publish data from this pipeline to. If `target` is specified, tables in this pipeline are published to a `target` schema inside `catalog` (for example, `catalog`.`target`.`table`). If `target` is not specified, no data is published to Unity Catalog. | +| `channel` | String | DLT Release Channel that specifies which version to use. | +| `clusters` | Sequence | Cluster settings for this pipeline deployment. | +| `configuration` | Map | String-String configuration for this pipeline execution. | +| `continuous` | Boolean | Whether the pipeline is continuous or triggered. This replaces `trigger`. | +| `deployment` | Map | Deployment type of this pipeline. | +| `development` | Boolean | Whether the pipeline is in Development mode. Defaults to false. | +| `edition` | String | Pipeline product edition. | +| `filters` | Map | Filters on which Pipeline packages to include in the deployed graph. | +| `gateway_definition` | Map | The definition of a gateway pipeline to support change data capture. | +| `id` | String | Unique identifier for this pipeline. | +| `ingestion_definition` | Map | The configuration for a managed ingestion pipeline. These settings cannot be used with the 'libraries', 'target' or 'catalog' settings. | +| `libraries` | Sequence | Libraries or code needed by this deployment. | +| `name` | String | Friendly identifier for this pipeline. | +| `notifications` | Sequence | List of notification settings for this pipeline. | +| `permissions` | Sequence | - | +| `photon` | Boolean | Whether Photon is enabled for this pipeline. | +| `restart_window` | Map | Restart window of this pipeline. | +| `schema` | String | The default schema (database) where tables are read from or published to. The presence of this field implies that the pipeline is in direct publishing mode. | +| `serverless` | Boolean | Whether serverless compute is enabled for this pipeline. | +| `storage` | String | DBFS root directory for storing checkpoints and tables. | +| `target` | String | Target schema (database) to add tables in this pipeline to. If not specified, no data is published to the Hive metastore or Unity Catalog. To publish to Unity Catalog, also specify `catalog`. | +| `trigger` | Map | Which pipeline trigger to use. Deprecated: Use `continuous` instead. | + +### resources.pipelines.configuration +String-String configuration for this pipeline execution. +### resources.pipelines.deployment +Deployment type of this pipeline. +#### Attributes +| Key | Type | Description | +|----------------------|--------|----------------------------------------------------------------| +| `kind` | String | The deployment method that manages the pipeline. | +| `metadata_file_path` | String | The path to the file containing metadata about the deployment. | + +### resources.pipelines.filters +Filters on which Pipeline packages to include in the deployed graph. +#### Attributes +| Key | Type | Description | +|-----------|----------|-------------------| +| `exclude` | Sequence | Paths to exclude. | +| `include` | Sequence | Paths to include. | + +### resources.pipelines.gateway_definition +The definition of a gateway pipeline to support change data capture. +#### Attributes +| Key | Type | Description | +|---------------------------|--------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `connection_id` | String | [Deprecated, use connection_name instead] Immutable. The Unity Catalog connection that this gateway pipeline uses to communicate with the source. | +| `connection_name` | String | Immutable. The Unity Catalog connection that this gateway pipeline uses to communicate with the source. | +| `gateway_storage_catalog` | String | Required, Immutable. The name of the catalog for the gateway pipeline's storage location. | +| `gateway_storage_name` | String | Optional. The Unity Catalog-compatible name for the gateway storage location. This is the destination to use for the data that is extracted by the gateway. Delta Live Tables system will automatically create the storage location under the catalog and schema. | +| `gateway_storage_schema` | String | Required, Immutable. The name of the schema for the gateway pipelines's storage location. | + +### resources.pipelines.ingestion_definition +The configuration for a managed ingestion pipeline. These settings cannot be used with the 'libraries', 'target' or 'catalog' settings. +#### Attributes +| Key | Type | Description | +|------------------------|----------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `connection_name` | String | Immutable. The Unity Catalog connection that this ingestion pipeline uses to communicate with the source. This is used with connectors for applications like Salesforce, Workday, and so on. | +| `ingestion_gateway_id` | String | Immutable. Identifier for the gateway that is used by this ingestion pipeline to communicate with the source database. This is used with connectors to databases like SQL Server. | +| `objects` | Sequence | Required. Settings specifying tables to replicate and the destination for the replicated tables. | +| `table_configuration` | Map | Configuration settings to control the ingestion of tables. These settings are applied to all tables in the pipeline. | + +### resources.pipelines.ingestion_definition.table_configuration +Configuration settings to control the ingestion of tables. These settings are applied to all tables in the pipeline. +#### Attributes +| Key | Type | Description | +|-------------------------------------|----------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `primary_keys` | Sequence | The primary key of the table used to apply changes. | +| `salesforce_include_formula_fields` | Boolean | If true, formula fields defined in the table are included in the ingestion. This setting is only valid for the Salesforce connector | +| `scd_type` | String | The SCD type to use to ingest the table. | +| `sequence_by` | Sequence | The column names specifying the logical order of events in the source data. Delta Live Tables uses this sequencing to handle change events that arrive out of order. | + +### resources.pipelines.restart_window +Restart window of this pipeline. +#### Attributes +| Key | Type | Description | +|----------------|---------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `days_of_week` | String | Days of week in which the restart is allowed to happen (within a five-hour window starting at start_hour). If not specified all days of the week will be used. | +| `start_hour` | Integer | An integer between 0 and 23 denoting the start hour for the restart window in the 24-hour day. Continuous pipeline restart is triggered only within a five-hour window starting at this hour. | +| `time_zone_id` | String | Time zone id of restart window. See https://docs.databricks.com/sql/language-manual/sql-ref-syntax-aux-conf-mgmt-set-timezone.html for details. If not specified, UTC will be used. | + +### resources.pipelines.trigger +Which pipeline trigger to use. Deprecated: Use `continuous` instead. +#### Attributes +| Key | Type | Description | +|----------|------|-------------| +| `cron` | Map | - | +| `manual` | Map | - | + +### resources.pipelines.trigger.cron + +#### Attributes +| Key | Type | Description | +|------------------------|--------|-------------| +| `quartz_cron_schedule` | String | - | +| `timezone_id` | String | - | + +### resources.pipelines.trigger.manual + +### resources.quality_monitors +The quality monitor definitions for the bundle. +| Key | Type | Description | +|----------|------|----------------------------| +| `` | Map | The definition of the item | + +Each item has the following attributes: +| Key | Type | Description | +|------------------------------|----------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `assets_dir` | String | The directory to store monitoring assets (e.g. dashboard, metric tables). | +| `baseline_table_name` | String | Name of the baseline table from which drift metrics are computed from. Columns in the monitored table should also be present in the baseline table. | +| `custom_metrics` | Sequence | Custom metrics to compute on the monitored table. These can be aggregate metrics, derived metrics (from already computed aggregate metrics), or drift metrics (comparing metrics across time windows). | +| `data_classification_config` | Map | The data classification config for the monitor. | +| `inference_log` | Map | Configuration for monitoring inference logs. | +| `notifications` | Map | The notification settings for the monitor. | +| `output_schema_name` | String | Schema where output metric tables are created. | +| `schedule` | Map | The schedule for automatically updating and refreshing metric tables. | +| `skip_builtin_dashboard` | Boolean | Whether to skip creating a default dashboard summarizing data quality metrics. | +| `slicing_exprs` | Sequence | List of column expressions to slice data with for targeted analysis. The data is grouped by each expression independently, resulting in a separate slice for each predicate and its complements. For high-cardinality columns, only the top 100 unique values by frequency will generate slices. | +| `snapshot` | Map | Configuration for monitoring snapshot tables. | +| `table_name` | String | - | +| `time_series` | Map | Configuration for monitoring time series tables. | +| `warehouse_id` | String | Optional argument to specify the warehouse for dashboard creation. If not specified, the first running warehouse will be used. | + +### resources.quality_monitors.data_classification_config +The data classification config for the monitor. +#### Attributes +| Key | Type | Description | +|-----------|---------|-----------------------------------------| +| `enabled` | Boolean | Whether data classification is enabled. | + +### resources.quality_monitors.inference_log +Configuration for monitoring inference logs. +#### Attributes +| Key | Type | Description | +|------------------------|----------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `granularities` | Sequence | Granularities for aggregating data into time windows based on their timestamp. Currently the following static granularities are supported: {``"5 minutes"``, ``"30 minutes"``, ``"1 hour"``, ``"1 day"``, ``" week(s)"``, ``"1 month"``, ``"1 year"``}. | +| `label_col` | String | Optional column that contains the ground truth for the prediction. | +| `model_id_col` | String | Column that contains the id of the model generating the predictions. Metrics will be computed per model id by default, and also across all model ids. | +| `prediction_col` | String | Column that contains the output/prediction from the model. | +| `prediction_proba_col` | String | Optional column that contains the prediction probabilities for each class in a classification problem type. The values in this column should be a map, mapping each class label to the prediction probability for a given sample. The map should be of PySpark MapType(). | +| `problem_type` | String | Problem type the model aims to solve. Determines the type of model-quality metrics that will be computed. | +| `timestamp_col` | String | Column that contains the timestamps of requests. The column must be one of the following: - A ``TimestampType`` column - A column whose values can be converted to timestamps through the pyspark ``to_timestamp`` [function](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.functions.to_timestamp.html). | + +### resources.quality_monitors.notifications +The notification settings for the monitor. +#### Attributes +| Key | Type | Description | +|--------------------------------------|------|------------------------------------------------------------------------------| +| `on_failure` | Map | Who to send notifications to on monitor failure. | +| `on_new_classification_tag_detected` | Map | Who to send notifications to when new data classification tags are detected. | + +### resources.quality_monitors.notifications.on_failure +Who to send notifications to on monitor failure. +#### Attributes +| Key | Type | Description | +|-------------------|----------|-------------------------------------------------------------------------------------------------------| +| `email_addresses` | Sequence | The list of email addresses to send the notification to. A maximum of 5 email addresses is supported. | + +### resources.quality_monitors.notifications.on_new_classification_tag_detected +Who to send notifications to when new data classification tags are detected. +#### Attributes +| Key | Type | Description | +|-------------------|----------|-------------------------------------------------------------------------------------------------------| +| `email_addresses` | Sequence | The list of email addresses to send the notification to. A maximum of 5 email addresses is supported. | + +### resources.quality_monitors.schedule +The schedule for automatically updating and refreshing metric tables. +#### Attributes +| Key | Type | Description | +|--------------------------|--------|------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `pause_status` | String | Read only field that indicates whether a schedule is paused or not. | +| `quartz_cron_expression` | String | The expression that determines when to run the monitor. See [examples](https://www.quartz-scheduler.org/documentation/quartz-2.3.0/tutorials/crontrigger.html). | +| `timezone_id` | String | The timezone id (e.g., ``"PST"``) in which to evaluate the quartz expression. | + +### resources.quality_monitors.snapshot +Configuration for monitoring snapshot tables. +### resources.quality_monitors.time_series +Configuration for monitoring time series tables. +#### Attributes +| Key | Type | Description | +|-----------------|----------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `granularities` | Sequence | Granularities for aggregating data into time windows based on their timestamp. Currently the following static granularities are supported: {``"5 minutes"``, ``"30 minutes"``, ``"1 hour"``, ``"1 day"``, ``" week(s)"``, ``"1 month"``, ``"1 year"``}. | +| `timestamp_col` | String | Column that contains the timestamps of requests. The column must be one of the following: - A ``TimestampType`` column - A column whose values can be converted to timestamps through the pyspark ``to_timestamp`` [function](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.functions.to_timestamp.html). | + +### resources.registered_models +The registered model definitions for the bundle. +| Key | Type | Description | +|----------|------|----------------------------| +| `` | Map | The definition of the item | + +Each item has the following attributes: +| Key | Type | Description | +|--------------------|----------|-----------------------------------------------------------------------------------| +| `catalog_name` | String | The name of the catalog where the schema and the registered model reside | +| `comment` | String | The comment attached to the registered model | +| `grants` | Sequence | - | +| `name` | String | The name of the registered model | +| `schema_name` | String | The name of the schema where the registered model resides | +| `storage_location` | String | The storage location on the cloud under which model version data files are stored | + +### resources.schemas +The schema definitions for the bundle. +| Key | Type | Description | +|----------|------|----------------------------| +| `` | Map | The definition of the item | + +Each item has the following attributes: +| Key | Type | Description | +|----------------|----------|----------------------------------------------------| +| `catalog_name` | String | Name of parent catalog. | +| `comment` | String | User-provided free-form text description. | +| `grants` | Sequence | - | +| `name` | String | Name of schema, relative to parent catalog. | +| `properties` | Map | - | +| `storage_root` | String | Storage root URL for managed tables within schema. | + +### resources.schemas.properties + +### resources.volumes + +| Key | Type | Description | +|----------|------|----------------------------| +| `` | Map | The definition of the item | + +Each item has the following attributes: +| Key | Type | Description | +|--------------------|----------|-------------------------------------------------------------| +| `catalog_name` | String | The name of the catalog where the schema and the volume are | +| `comment` | String | The comment attached to the volume | +| `grants` | Sequence | - | +| `name` | String | The name of the volume | +| `schema_name` | String | The name of the schema where the volume is | +| `storage_location` | String | The storage location on the cloud | +| `volume_type` | String | - | + +## run_as +The identity to use to run the bundle. +#### Attributes +| Key | Type | Description | +|--------------------------|--------|--------------------------------------------------------------------------------------------------------------| +| `service_principal_name` | String | Application ID of an active service principal. Setting this field requires the `servicePrincipal/user` role. | +| `user_name` | String | The email of an active workspace user. Non-admin users can only set this field to their own email. | + +## sync +The files and file paths to include or exclude in the bundle. See [_](/dev-tools/bundles/) +#### Attributes +| Key | Type | Description | +|-----------|----------|----------------------------------------------------------------------------------------------------------------------------| +| `exclude` | Sequence | A list of files or folders to exclude from the bundle. | +| `include` | Sequence | A list of files or folders to include in the bundle. | +| `paths` | Sequence | The local folder paths, which can be outside the bundle root, to synchronize to the workspace when the bundle is deployed. | + +## targets +Defines deployment targets for the bundle. +| Key | Type | Description | +|----------|------|----------------------------| +| `` | Map | The definition of the item | + +Each item has the following attributes: +| Key | Type | Description | +|---------------|----------|---------------------------------------------------------------------------------------------------------------------------------------------| +| `artifacts` | Map | The artifacts to include in the target deployment. See [_](#artifact) | +| `bundle` | Map | The name of the bundle when deploying to this target. | +| `cluster_id` | String | The ID of the cluster to use for this target. | +| `compute_id` | String | Deprecated. The ID of the compute to use for this target. | +| `default` | Boolean | Whether this target is the default target. | +| `git` | Map | The Git version control settings for the target. See [_](#git). | +| `mode` | String | The deployment mode for the target. Valid values are `development` or `production`. See [_](/dev-tools/bundles/deployment-modes.md). | +| `permissions` | Sequence | The permissions for deploying and running the bundle in the target. See [_](#permission). | +| `presets` | Map | The deployment presets for the target. See [_](#preset). | +| `resources` | Map | The resource definitions for the target. See [_](#resources). | +| `run_as` | Map | The identity to use to run the bundle. See [_](#job_run_as) and [_](/dev-tools/bundles/run_as.md). | +| `sync` | Map | The local paths to sync to the target workspace when a bundle is run or deployed. See [_](#sync). | +| `variables` | Map | The custom variable definitions for the target. See [_](/dev-tools/bundles/settings.md#variables) and [_](/dev-tools/bundles/variables.md). | +| `workspace` | Map | The Databricks workspace for the target. [_](#workspace) | + +### targets.artifacts +The artifacts to include in the target deployment. +| Key | Type | Description | +|----------|------|----------------------------| +| `` | Map | The definition of the item | + +Each item has the following attributes: +| Key | Type | Description | +|--------------|----------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `build` | String | An optional set of non-default build commands that you want to run locally before deployment. For Python wheel builds, the Databricks CLI assumes that it can find a local install of the Python wheel package to run builds, and it runs the command python setup.py bdist_wheel by default during each bundle deployment. To specify multiple build commands, separate each command with double-ampersand (&&) characters. | +| `executable` | String | The executable type. | +| `files` | Sequence | The source files for the artifact, defined as an [_](#artifact_file). | +| `path` | String | The location where the built artifact will be saved. | +| `type` | String | The type of the artifact. Valid values are `wheel` or `jar` | + +### targets.bundle +The name of the bundle when deploying to this target. +#### Attributes +| Key | Type | Description | +|--------------------------|--------|------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `cluster_id` | String | The ID of a cluster to use to run the bundle. See [_](/dev-tools/bundles/settings.md#cluster_id). | +| `compute_id` | String | - | +| `databricks_cli_version` | String | The Databricks CLI version to use for the bundle. See [_](/dev-tools/bundles/settings.md#databricks_cli_version). | +| `deployment` | Map | The definition of the bundle deployment. For supported attributes, see [_](#deployment) and [_](/dev-tools/bundles/deployment-modes.md). | +| `git` | Map | The Git version control details that are associated with your bundle. For supported attributes, see [_](#git) and [_](/dev-tools/bundles/settings.md#git). | +| `name` | String | The name of the bundle. | +| `uuid` | String | - | + +### targets.bundle.deployment +The definition of the bundle deployment +#### Attributes +| Key | Type | Description | +|-----------------------|---------|---------------------------------------------------------------------------------------------------------| +| `fail_on_active_runs` | Boolean | Whether to fail on active runs. If this is set to true a deployment that is running can be interrupted. | +| `lock` | Map | The deployment lock attributes. See [_](#lock). | + +### targets.bundle.deployment.lock +The deployment lock attributes. +#### Attributes +| Key | Type | Description | +|-----------|---------|----------------------------------------------| +| `enabled` | Boolean | Whether this lock is enabled. | +| `force` | Boolean | Whether to force this lock if it is enabled. | + +### targets.bundle.git +The Git version control details that are associated with your bundle. +#### Attributes +| Key | Type | Description | +|--------------|--------|--------------------------------------------------------------------------------| +| `branch` | String | The Git branch name. See [_](/dev-tools/bundles/settings.md#git). | +| `origin_url` | String | The origin URL of the repository. See [_](/dev-tools/bundles/settings.md#git). | + +### targets.git +The Git version control settings for the target. +#### Attributes +| Key | Type | Description | +|--------------|--------|--------------------------------------------------------------------------------| +| `branch` | String | The Git branch name. See [_](/dev-tools/bundles/settings.md#git). | +| `origin_url` | String | The origin URL of the repository. See [_](/dev-tools/bundles/settings.md#git). | + +### targets.presets +The deployment presets for the target. +#### Attributes +| Key | Type | Description | +|----------------------------|---------|-------------------------------------------------------------------------------------------------| +| `jobs_max_concurrent_runs` | Integer | The maximum concurrent runs for a job. | +| `name_prefix` | String | The prefix for job runs of the bundle. | +| `pipelines_development` | Boolean | Whether pipeline deployments should be locked in development mode. | +| `source_linked_deployment` | Boolean | Whether to link the deployment to the bundle source. | +| `tags` | Map | The tags for the bundle deployment. | +| `trigger_pause_status` | String | A pause status to apply to all job triggers and schedules. Valid values are PAUSED or UNPAUSED. | + +### targets.presets.tags +The tags for the bundle deployment. +### targets.resources +The resource definitions for the target. +#### Attributes +| Key | Type | Description | +|---------------------------|------|------------------------------------------------------------------------------------------------------------------------| +| `clusters` | Map | The cluster definitions for the bundle. See [_](/dev-tools/bundles/resources.md#cluster) | +| `dashboards` | Map | The dashboard definitions for the bundle. See [_](/dev-tools/bundles/resources.md#dashboard) | +| `experiments` | Map | The experiment definitions for the bundle. See [_](/dev-tools/bundles/resources.md#experiment) | +| `jobs` | Map | The job definitions for the bundle. See [_](/dev-tools/bundles/resources.md#job) | +| `model_serving_endpoints` | Map | The model serving endpoint definitions for the bundle. See [_](/dev-tools/bundles/resources.md#model_serving_endpoint) | +| `models` | Map | The model definitions for the bundle. See [_](/dev-tools/bundles/resources.md#model) | +| `pipelines` | Map | The pipeline definitions for the bundle. See [_](/dev-tools/bundles/resources.md#pipeline) | +| `quality_monitors` | Map | The quality monitor definitions for the bundle. See [_](/dev-tools/bundles/resources.md#quality_monitor) | +| `registered_models` | Map | The registered model definitions for the bundle. See [_](/dev-tools/bundles/resources.md#registered_model) | +| `schemas` | Map | The schema definitions for the bundle. See [_](/dev-tools/bundles/resources.md#schema) | +| `volumes` | Map | - | + +### targets.resources.clusters +The cluster definitions for the bundle. +| Key | Type | Description | +|----------|------|----------------------------| +| `` | Map | The definition of the item | + +Each item has the following attributes: +| Key | Type | Description | +|--------------------------------|----------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `apply_policy_default_values` | Boolean | When set to true, fixed and default values from the policy will be used for fields that are omitted. When set to false, only fixed values from the policy will be applied. | +| `autoscale` | Map | Parameters needed in order to automatically scale clusters up and down based on load. Note: autoscaling works best with DB runtime versions 3.0 or later. | +| `autotermination_minutes` | Integer | Automatically terminates the cluster after it is inactive for this time in minutes. If not set, this cluster will not be automatically terminated. If specified, the threshold must be between 10 and 10000 minutes. Users can also set this value to 0 to explicitly disable automatic termination. | +| `aws_attributes` | Map | Attributes related to clusters running on Amazon Web Services. If not specified at cluster creation, a set of default values will be used. | +| `azure_attributes` | Map | Attributes related to clusters running on Microsoft Azure. If not specified at cluster creation, a set of default values will be used. | +| `cluster_log_conf` | Map | The configuration for delivering spark logs to a long-term storage destination. Two kinds of destinations (dbfs and s3) are supported. Only one destination can be specified for one cluster. If the conf is given, the logs will be delivered to the destination every `5 mins`. The destination of driver logs is `$destination/$clusterId/driver`, while the destination of executor logs is `$destination/$clusterId/executor`. | +| `cluster_name` | String | Cluster name requested by the user. This doesn't have to be unique. If not specified at creation, the cluster name will be an empty string. | +| `custom_tags` | Map | Additional tags for cluster resources. Databricks will tag all cluster resources (e.g., AWS instances and EBS volumes) with these tags in addition to `default_tags`. Notes: - Currently, Databricks allows at most 45 custom tags - Clusters can only reuse cloud resources if the resources' tags are a subset of the cluster tags | +| `data_security_mode` | String | - | +| `docker_image` | Map | - | +| `driver_instance_pool_id` | String | The optional ID of the instance pool for the driver of the cluster belongs. The pool cluster uses the instance pool with id (instance_pool_id) if the driver pool is not assigned. | +| `driver_node_type_id` | String | The node type of the Spark driver. Note that this field is optional; if unset, the driver node type will be set as the same value as `node_type_id` defined above. | +| `enable_elastic_disk` | Boolean | Autoscaling Local Storage: when enabled, this cluster will dynamically acquire additional disk space when its Spark workers are running low on disk space. This feature requires specific AWS permissions to function correctly - refer to the User Guide for more details. | +| `enable_local_disk_encryption` | Boolean | Whether to enable LUKS on cluster VMs' local disks | +| `gcp_attributes` | Map | Attributes related to clusters running on Google Cloud Platform. If not specified at cluster creation, a set of default values will be used. | +| `init_scripts` | Sequence | The configuration for storing init scripts. Any number of destinations can be specified. The scripts are executed sequentially in the order provided. If `cluster_log_conf` is specified, init script logs are sent to `//init_scripts`. | +| `instance_pool_id` | String | The optional ID of the instance pool to which the cluster belongs. | +| `node_type_id` | String | This field encodes, through a single value, the resources available to each of the Spark nodes in this cluster. For example, the Spark nodes can be provisioned and optimized for memory or compute intensive workloads. A list of available node types can be retrieved by using the :method:clusters/listNodeTypes API call. | +| `num_workers` | Integer | Number of worker nodes that this cluster should have. A cluster has one Spark Driver and `num_workers` Executors for a total of `num_workers` + 1 Spark nodes. Note: When reading the properties of a cluster, this field reflects the desired number of workers rather than the actual current number of workers. For instance, if a cluster is resized from 5 to 10 workers, this field will immediately be updated to reflect the target size of 10 workers, whereas the workers listed in `spark_info` will gradually increase from 5 to 10 as the new nodes are provisioned. | +| `permissions` | Sequence | - | +| `policy_id` | String | The ID of the cluster policy used to create the cluster if applicable. | +| `runtime_engine` | String | - | +| `single_user_name` | String | Single user name if data_security_mode is `SINGLE_USER` | +| `spark_conf` | Map | An object containing a set of optional, user-specified Spark configuration key-value pairs. Users can also pass in a string of extra JVM options to the driver and the executors via `spark.driver.extraJavaOptions` and `spark.executor.extraJavaOptions` respectively. | +| `spark_env_vars` | Map | An object containing a set of optional, user-specified environment variable key-value pairs. Please note that key-value pair of the form (X,Y) will be exported as is (i.e., `export X='Y'`) while launching the driver and workers. In order to specify an additional set of `SPARK_DAEMON_JAVA_OPTS`, we recommend appending them to `$SPARK_DAEMON_JAVA_OPTS` as shown in the example below. This ensures that all default databricks managed environmental variables are included as well. Example Spark environment variables: `{"SPARK_WORKER_MEMORY": "28000m", "SPARK_LOCAL_DIRS": "/local_disk0"}` or `{"SPARK_DAEMON_JAVA_OPTS": "$SPARK_DAEMON_JAVA_OPTS -Dspark.shuffle.service.enabled=true"}` | +| `spark_version` | String | The Spark version of the cluster, e.g. `3.3.x-scala2.11`. A list of available Spark versions can be retrieved by using the :method:clusters/sparkVersions API call. | +| `ssh_public_keys` | Sequence | SSH public key contents that will be added to each Spark node in this cluster. The corresponding private keys can be used to login with the user name `ubuntu` on port `2200`. Up to 10 keys can be specified. | +| `workload_type` | Map | - | + +### targets.resources.clusters.autoscale +Parameters needed in order to automatically scale clusters up and down based on load. +Note: autoscaling works best with DB runtime versions 3.0 or later. +#### Attributes +| Key | Type | Description | +|---------------|---------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `max_workers` | Integer | The maximum number of workers to which the cluster can scale up when overloaded. Note that `max_workers` must be strictly greater than `min_workers`. | +| `min_workers` | Integer | The minimum number of workers to which the cluster can scale down when underutilized. It is also the initial number of workers the cluster will have after creation. | + +### targets.resources.clusters.aws_attributes +Attributes related to clusters running on Amazon Web Services. +If not specified at cluster creation, a set of default values will be used. +#### Attributes +| Key | Type | Description | +|--------------------------|---------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `availability` | String | - | +| `ebs_volume_count` | Integer | The number of volumes launched for each instance. Users can choose up to 10 volumes. This feature is only enabled for supported node types. Legacy node types cannot specify custom EBS volumes. For node types with no instance store, at least one EBS volume needs to be specified; otherwise, cluster creation will fail. These EBS volumes will be mounted at `/ebs0`, `/ebs1`, and etc. Instance store volumes will be mounted at `/local_disk0`, `/local_disk1`, and etc. If EBS volumes are attached, Databricks will configure Spark to use only the EBS volumes for scratch storage because heterogenously sized scratch devices can lead to inefficient disk utilization. If no EBS volumes are attached, Databricks will configure Spark to use instance store volumes. Please note that if EBS volumes are specified, then the Spark configuration `spark.local.dir` will be overridden. | +| `ebs_volume_iops` | Integer | If using gp3 volumes, what IOPS to use for the disk. If this is not set, the maximum performance of a gp2 volume with the same volume size will be used. | +| `ebs_volume_size` | Integer | The size of each EBS volume (in GiB) launched for each instance. For general purpose SSD, this value must be within the range 100 - 4096. For throughput optimized HDD, this value must be within the range 500 - 4096. | +| `ebs_volume_throughput` | Integer | If using gp3 volumes, what throughput to use for the disk. If this is not set, the maximum performance of a gp2 volume with the same volume size will be used. | +| `ebs_volume_type` | String | - | +| `first_on_demand` | Integer | The first `first_on_demand` nodes of the cluster will be placed on on-demand instances. If this value is greater than 0, the cluster driver node in particular will be placed on an on-demand instance. If this value is greater than or equal to the current cluster size, all nodes will be placed on on-demand instances. If this value is less than the current cluster size, `first_on_demand` nodes will be placed on on-demand instances and the remainder will be placed on `availability` instances. Note that this value does not affect cluster size and cannot currently be mutated over the lifetime of a cluster. | +| `instance_profile_arn` | String | Nodes for this cluster will only be placed on AWS instances with this instance profile. If ommitted, nodes will be placed on instances without an IAM instance profile. The instance profile must have previously been added to the Databricks environment by an account administrator. This feature may only be available to certain customer plans. If this field is ommitted, we will pull in the default from the conf if it exists. | +| `spot_bid_price_percent` | Integer | The bid price for AWS spot instances, as a percentage of the corresponding instance type's on-demand price. For example, if this field is set to 50, and the cluster needs a new `r3.xlarge` spot instance, then the bid price is half of the price of on-demand `r3.xlarge` instances. Similarly, if this field is set to 200, the bid price is twice the price of on-demand `r3.xlarge` instances. If not specified, the default value is 100. When spot instances are requested for this cluster, only spot instances whose bid price percentage matches this field will be considered. Note that, for safety, we enforce this field to be no more than 10000. The default value and documentation here should be kept consistent with CommonConf.defaultSpotBidPricePercent and CommonConf.maxSpotBidPricePercent. | +| `zone_id` | String | Identifier for the availability zone/datacenter in which the cluster resides. This string will be of a form like "us-west-2a". The provided availability zone must be in the same region as the Databricks deployment. For example, "us-west-2a" is not a valid zone id if the Databricks deployment resides in the "us-east-1" region. This is an optional field at cluster creation, and if not specified, a default zone will be used. If the zone specified is "auto", will try to place cluster in a zone with high availability, and will retry placement in a different AZ if there is not enough capacity. The list of available zones as well as the default value can be found by using the `List Zones` method. | + +### targets.resources.clusters.azure_attributes +Attributes related to clusters running on Microsoft Azure. +If not specified at cluster creation, a set of default values will be used. +#### Attributes +| Key | Type | Description | +|----------------------|---------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `availability` | String | - | +| `first_on_demand` | Integer | The first `first_on_demand` nodes of the cluster will be placed on on-demand instances. This value should be greater than 0, to make sure the cluster driver node is placed on an on-demand instance. If this value is greater than or equal to the current cluster size, all nodes will be placed on on-demand instances. If this value is less than the current cluster size, `first_on_demand` nodes will be placed on on-demand instances and the remainder will be placed on `availability` instances. Note that this value does not affect cluster size and cannot currently be mutated over the lifetime of a cluster. | +| `log_analytics_info` | Map | Defines values necessary to configure and run Azure Log Analytics agent | +| `spot_bid_max_price` | Any | The max bid price to be used for Azure spot instances. The Max price for the bid cannot be higher than the on-demand price of the instance. If not specified, the default value is -1, which specifies that the instance cannot be evicted on the basis of price, and only on the basis of availability. Further, the value should > 0 or -1. | + +### targets.resources.clusters.azure_attributes.log_analytics_info +Defines values necessary to configure and run Azure Log Analytics agent +#### Attributes +| Key | Type | Description | +|------------------------------|--------|-----------------------| +| `log_analytics_primary_key` | String | | +| `log_analytics_workspace_id` | String | | + +### targets.resources.clusters.cluster_log_conf +The configuration for delivering spark logs to a long-term storage destination. +Two kinds of destinations (dbfs and s3) are supported. Only one destination can be specified +for one cluster. If the conf is given, the logs will be delivered to the destination every +`5 mins`. The destination of driver logs is `$destination/$clusterId/driver`, while +the destination of executor logs is `$destination/$clusterId/executor`. +#### Attributes +| Key | Type | Description | +|--------|------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `dbfs` | Map | destination needs to be provided. e.g. `{ "dbfs" : { "destination" : "dbfs:/home/cluster_log" } }` | +| `s3` | Map | destination and either the region or endpoint need to be provided. e.g. `{ "s3": { "destination" : "s3://cluster_log_bucket/prefix", "region" : "us-west-2" } }` Cluster iam role is used to access s3, please make sure the cluster iam role in `instance_profile_arn` has permission to write data to the s3 destination. | + +### targets.resources.clusters.cluster_log_conf.dbfs +destination needs to be provided. e.g. +`{ "dbfs" : { "destination" : "dbfs:/home/cluster_log" } }` +#### Attributes +| Key | Type | Description | +|---------------|--------|----------------------------------------| +| `destination` | String | dbfs destination, e.g. `dbfs:/my/path` | + +### targets.resources.clusters.cluster_log_conf.s3 +destination and either the region or endpoint need to be provided. e.g. +`{ "s3": { "destination" : "s3://cluster_log_bucket/prefix", "region" : "us-west-2" } }` +Cluster iam role is used to access s3, please make sure the cluster iam role in +`instance_profile_arn` has permission to write data to the s3 destination. +#### Attributes +| Key | Type | Description | +|---------------------|---------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `canned_acl` | String | (Optional) Set canned access control list for the logs, e.g. `bucket-owner-full-control`. If `canned_cal` is set, please make sure the cluster iam role has `s3:PutObjectAcl` permission on the destination bucket and prefix. The full list of possible canned acl can be found at http://docs.aws.amazon.com/AmazonS3/latest/dev/acl-overview.html#canned-acl. Please also note that by default only the object owner gets full controls. If you are using cross account role for writing data, you may want to set `bucket-owner-full-control` to make bucket owner able to read the logs. | +| `destination` | String | S3 destination, e.g. `s3://my-bucket/some-prefix` Note that logs will be delivered using cluster iam role, please make sure you set cluster iam role and the role has write access to the destination. Please also note that you cannot use AWS keys to deliver logs. | +| `enable_encryption` | Boolean | (Optional) Flag to enable server side encryption, `false` by default. | +| `encryption_type` | String | (Optional) The encryption type, it could be `sse-s3` or `sse-kms`. It will be used only when encryption is enabled and the default type is `sse-s3`. | +| `endpoint` | String | S3 endpoint, e.g. `https://s3-us-west-2.amazonaws.com`. Either region or endpoint needs to be set. If both are set, endpoint will be used. | +| `kms_key` | String | (Optional) Kms key which will be used if encryption is enabled and encryption type is set to `sse-kms`. | +| `region` | String | S3 region, e.g. `us-west-2`. Either region or endpoint needs to be set. If both are set, endpoint will be used. | + +### targets.resources.clusters.custom_tags +Additional tags for cluster resources. Databricks will tag all cluster resources (e.g., AWS +instances and EBS volumes) with these tags in addition to `default_tags`. Notes: + +- Currently, Databricks allows at most 45 custom tags + +- Clusters can only reuse cloud resources if the resources' tags are a subset of the cluster tags +### targets.resources.clusters.docker_image + +#### Attributes +| Key | Type | Description | +|--------------|--------|--------------------------| +| `basic_auth` | Map | - | +| `url` | String | URL of the docker image. | + +### targets.resources.clusters.docker_image.basic_auth + +#### Attributes +| Key | Type | Description | +|------------|--------|----------------------| +| `password` | String | Password of the user | +| `username` | String | Name of the user | + +### targets.resources.clusters.gcp_attributes +Attributes related to clusters running on Google Cloud Platform. +If not specified at cluster creation, a set of default values will be used. +#### Attributes +| Key | Type | Description | +|-----------------------------|---------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `availability` | String | - | +| `boot_disk_size` | Integer | boot disk size in GB | +| `google_service_account` | String | If provided, the cluster will impersonate the google service account when accessing gcloud services (like GCS). The google service account must have previously been added to the Databricks environment by an account administrator. | +| `local_ssd_count` | Integer | If provided, each node (workers and driver) in the cluster will have this number of local SSDs attached. Each local SSD is 375GB in size. Refer to [GCP documentation](https://cloud.google.com/compute/docs/disks/local-ssd#choose_number_local_ssds) for the supported number of local SSDs for each instance type. | +| `use_preemptible_executors` | Boolean | This field determines whether the spark executors will be scheduled to run on preemptible VMs (when set to true) versus standard compute engine VMs (when set to false; default). Note: Soon to be deprecated, use the availability field instead. | +| `zone_id` | String | Identifier for the availability zone in which the cluster resides. This can be one of the following: - "HA" => High availability, spread nodes across availability zones for a Databricks deployment region [default] - "AUTO" => Databricks picks an availability zone to schedule the cluster on. - A GCP availability zone => Pick One of the available zones for (machine type + region) from https://cloud.google.com/compute/docs/regions-zones. | + +### targets.resources.clusters.spark_conf +An object containing a set of optional, user-specified Spark configuration key-value pairs. +Users can also pass in a string of extra JVM options to the driver and the executors via +`spark.driver.extraJavaOptions` and `spark.executor.extraJavaOptions` respectively. + +### targets.resources.clusters.spark_env_vars +An object containing a set of optional, user-specified environment variable key-value pairs. +Please note that key-value pair of the form (X,Y) will be exported as is (i.e., +`export X='Y'`) while launching the driver and workers. + +In order to specify an additional set of `SPARK_DAEMON_JAVA_OPTS`, we recommend appending +them to `$SPARK_DAEMON_JAVA_OPTS` as shown in the example below. This ensures that all +default databricks managed environmental variables are included as well. + +Example Spark environment variables: +`{"SPARK_WORKER_MEMORY": "28000m", "SPARK_LOCAL_DIRS": "/local_disk0"}` or +`{"SPARK_DAEMON_JAVA_OPTS": "$SPARK_DAEMON_JAVA_OPTS -Dspark.shuffle.service.enabled=true"}` +### targets.resources.clusters.workload_type + +#### Attributes +| Key | Type | Description | +|-----------|------|-------------------------------------------------------------------------| +| `clients` | Map | defined what type of clients can use the cluster. E.g. Notebooks, Jobs | + +### targets.resources.clusters.workload_type.clients + defined what type of clients can use the cluster. E.g. Notebooks, Jobs +#### Attributes +| Key | Type | Description | +|-------------|---------|------------------------------------------------------------| +| `jobs` | Boolean | With jobs set, the cluster can be used for jobs | +| `notebooks` | Boolean | With notebooks set, this cluster can be used for notebooks | + +### targets.resources.dashboards +The dashboard definitions for the bundle. +| Key | Type | Description | +|----------|------|----------------------------| +| `` | Map | The definition of the item | + +Each item has the following attributes: +| Key | Type | Description | +|------------------------|----------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `create_time` | String | The timestamp of when the dashboard was created. | +| `dashboard_id` | String | UUID identifying the dashboard. | +| `display_name` | String | The display name of the dashboard. | +| `embed_credentials` | Boolean | - | +| `etag` | String | The etag for the dashboard. Can be optionally provided on updates to ensure that the dashboard has not been modified since the last read. This field is excluded in List Dashboards responses. | +| `file_path` | String | - | +| `lifecycle_state` | String | The state of the dashboard resource. Used for tracking trashed status. | +| `parent_path` | String | The workspace path of the folder containing the dashboard. Includes leading slash and no trailing slash. This field is excluded in List Dashboards responses. | +| `path` | String | The workspace path of the dashboard asset, including the file name. Exported dashboards always have the file extension `.lvdash.json`. This field is excluded in List Dashboards responses. | +| `permissions` | Sequence | - | +| `serialized_dashboard` | Any | The contents of the dashboard in serialized string form. This field is excluded in List Dashboards responses. Use the [get dashboard API](https://docs.databricks.com/api/workspace/lakeview/get) to retrieve an example response, which includes the `serialized_dashboard` field. This field provides the structure of the JSON string that represents the dashboard's layout and components. | +| `update_time` | String | The timestamp of when the dashboard was last updated by the user. This field is excluded in List Dashboards responses. | +| `warehouse_id` | String | The warehouse ID used to run the dashboard. | + +### targets.resources.experiments +The experiment definitions for the bundle. +| Key | Type | Description | +|----------|------|----------------------------| +| `` | Map | The definition of the item | + +Each item has the following attributes: +| Key | Type | Description | +|---------------------|----------|------------------------------------------------------------------------------------------------------------------| +| `artifact_location` | String | Location where artifacts for the experiment are stored. | +| `creation_time` | Integer | Creation time | +| `experiment_id` | String | Unique identifier for the experiment. | +| `last_update_time` | Integer | Last update time | +| `lifecycle_stage` | String | Current life cycle stage of the experiment: "active" or "deleted". Deleted experiments are not returned by APIs. | +| `name` | String | Human readable name that identifies the experiment. | +| `permissions` | Sequence | - | +| `tags` | Sequence | Tags: Additional metadata key-value pairs. | + +### targets.resources.jobs +The job definitions for the bundle. +| Key | Type | Description | +|----------|------|----------------------------| +| `` | Map | The definition of the item | + +Each item has the following attributes: +| Key | Type | Description | +|-------------------------|----------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `budget_policy_id` | String | The id of the user specified budget policy to use for this job. If not specified, a default budget policy may be applied when creating or modifying the job. See `effective_budget_policy_id` for the budget policy used by this workload. | +| `continuous` | Map | An optional continuous property for this job. The continuous property will ensure that there is always one run executing. Only one of `schedule` and `continuous` can be used. | +| `description` | String | An optional description for the job. The maximum length is 27700 characters in UTF-8 encoding. | +| `email_notifications` | Map | An optional set of email addresses that is notified when runs of this job begin or complete as well as when this job is deleted. | +| `environments` | Sequence | A list of task execution environment specifications that can be referenced by serverless tasks of this job. An environment is required to be present for serverless tasks. For serverless notebook tasks, the environment is accessible in the notebook environment panel. For other serverless tasks, the task environment is required to be specified using environment_key in the task settings. | +| `git_source` | Map | An optional specification for a remote Git repository containing the source code used by tasks. Version-controlled source code is supported by notebook, dbt, Python script, and SQL File tasks. If `git_source` is set, these tasks retrieve the file from the remote repository by default. However, this behavior can be overridden by setting `source` to `WORKSPACE` on the task. Note: dbt and SQL File tasks support only version-controlled sources. If dbt or SQL File tasks are used, `git_source` must be defined on the job. | +| `health` | Map | - | +| `job_clusters` | Sequence | A list of job cluster specifications that can be shared and reused by tasks of this job. Libraries cannot be declared in a shared job cluster. You must declare dependent libraries in task settings. | +| `max_concurrent_runs` | Integer | An optional maximum allowed number of concurrent runs of the job. Set this value if you want to be able to execute multiple runs of the same job concurrently. This is useful for example if you trigger your job on a frequent schedule and want to allow consecutive runs to overlap with each other, or if you want to trigger multiple runs which differ by their input parameters. This setting affects only new runs. For example, suppose the job’s concurrency is 4 and there are 4 concurrent active runs. Then setting the concurrency to 3 won’t kill any of the active runs. However, from then on, new runs are skipped unless there are fewer than 3 active runs. This value cannot exceed 1000. Setting this value to `0` causes all new runs to be skipped. | +| `name` | String | An optional name for the job. The maximum length is 4096 bytes in UTF-8 encoding. | +| `notification_settings` | Map | Optional notification settings that are used when sending notifications to each of the `email_notifications` and `webhook_notifications` for this job. | +| `parameters` | Sequence | Job-level parameter definitions | +| `permissions` | Sequence | - | +| `queue` | Map | The queue settings of the job. | +| `run_as` | Map | - | +| `schedule` | Map | An optional periodic schedule for this job. The default behavior is that the job only runs when triggered by clicking “Run Now” in the Jobs UI or sending an API request to `runNow`. | +| `tags` | Map | A map of tags associated with the job. These are forwarded to the cluster as cluster tags for jobs clusters, and are subject to the same limitations as cluster tags. A maximum of 25 tags can be added to the job. | +| `tasks` | Sequence | A list of task specifications to be executed by this job. | +| `timeout_seconds` | Integer | An optional timeout applied to each run of this job. A value of `0` means no timeout. | +| `trigger` | Map | A configuration to trigger a run when certain conditions are met. The default behavior is that the job runs only when triggered by clicking “Run Now” in the Jobs UI or sending an API request to `runNow`. | +| `webhook_notifications` | Map | A collection of system notification IDs to notify when runs of this job begin or complete. | + +### targets.resources.jobs.continuous +An optional continuous property for this job. The continuous property will ensure that there is always one run executing. Only one of `schedule` and `continuous` can be used. +#### Attributes +| Key | Type | Description | +|----------------|--------|----------------------------------------------------------------------------------------------| +| `pause_status` | String | Indicate whether the continuous execution of the job is paused or not. Defaults to UNPAUSED. | + +### targets.resources.jobs.email_notifications +An optional set of email addresses that is notified when runs of this job begin or complete as well as when this job is deleted. +#### Attributes +| Key | Type | Description | +|------------------------------------------|----------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `no_alert_for_skipped_runs` | Boolean | If true, do not send email to recipients specified in `on_failure` if the run is skipped. This field is `deprecated`. Please use the `notification_settings.no_alert_for_skipped_runs` field. | +| `on_duration_warning_threshold_exceeded` | Sequence | A list of email addresses to be notified when the duration of a run exceeds the threshold specified for the `RUN_DURATION_SECONDS` metric in the `health` field. If no rule for the `RUN_DURATION_SECONDS` metric is specified in the `health` field for the job, notifications are not sent. | +| `on_failure` | Sequence | A list of email addresses to be notified when a run unsuccessfully completes. A run is considered to have completed unsuccessfully if it ends with an `INTERNAL_ERROR` `life_cycle_state` or a `FAILED`, or `TIMED_OUT` result_state. If this is not specified on job creation, reset, or update the list is empty, and notifications are not sent. | +| `on_start` | Sequence | A list of email addresses to be notified when a run begins. If not specified on job creation, reset, or update, the list is empty, and notifications are not sent. | +| `on_streaming_backlog_exceeded` | Sequence | A list of email addresses to notify when any streaming backlog thresholds are exceeded for any stream. Streaming backlog thresholds can be set in the `health` field using the following metrics: `STREAMING_BACKLOG_BYTES`, `STREAMING_BACKLOG_RECORDS`, `STREAMING_BACKLOG_SECONDS`, or `STREAMING_BACKLOG_FILES`. Alerting is based on the 10-minute average of these metrics. If the issue persists, notifications are resent every 30 minutes. | +| `on_success` | Sequence | A list of email addresses to be notified when a run successfully completes. A run is considered to have completed successfully if it ends with a `TERMINATED` `life_cycle_state` and a `SUCCESS` result_state. If not specified on job creation, reset, or update, the list is empty, and notifications are not sent. | + +### targets.resources.jobs.git_source +An optional specification for a remote Git repository containing the source code used by tasks. Version-controlled source code is supported by notebook, dbt, Python script, and SQL File tasks. + +If `git_source` is set, these tasks retrieve the file from the remote repository by default. However, this behavior can be overridden by setting `source` to `WORKSPACE` on the task. + +Note: dbt and SQL File tasks support only version-controlled sources. If dbt or SQL File tasks are used, `git_source` must be defined on the job. +#### Attributes +| Key | Type | Description | +|----------------|--------|--------------------------------------------------------------------------------------------------------------------------------------| +| `git_branch` | String | Name of the branch to be checked out and used by this job. This field cannot be specified in conjunction with git_tag or git_commit. | +| `git_commit` | String | Commit to be checked out and used by this job. This field cannot be specified in conjunction with git_branch or git_tag. | +| `git_provider` | String | Unique identifier of the service used to host the Git repository. The value is case insensitive. | +| `git_tag` | String | Name of the tag to be checked out and used by this job. This field cannot be specified in conjunction with git_branch or git_commit. | +| `git_url` | String | URL of the repository to be cloned by this job. | + +### targets.resources.jobs.health + +#### Attributes +| Key | Type | Description | +|---------|----------|-------------| +| `rules` | Sequence | - | + +### targets.resources.jobs.notification_settings +Optional notification settings that are used when sending notifications to each of the `email_notifications` and `webhook_notifications` for this job. +#### Attributes +| Key | Type | Description | +|------------------------------|---------|----------------------------------------------------------------------------------------------------| +| `no_alert_for_canceled_runs` | Boolean | If true, do not send notifications to recipients specified in `on_failure` if the run is canceled. | +| `no_alert_for_skipped_runs` | Boolean | If true, do not send notifications to recipients specified in `on_failure` if the run is skipped. | + +### targets.resources.jobs.queue +The queue settings of the job. +#### Attributes +| Key | Type | Description | +|-----------|---------|-----------------------------------------------------------------| +| `enabled` | Boolean | If true, enable queueing for the job. This is a required field. | + +### targets.resources.jobs.run_as + +#### Attributes +| Key | Type | Description | +|--------------------------|--------|--------------------------------------------------------------------------------------------------------------| +| `service_principal_name` | String | Application ID of an active service principal. Setting this field requires the `servicePrincipal/user` role. | +| `user_name` | String | The email of an active workspace user. Non-admin users can only set this field to their own email. | + +### targets.resources.jobs.schedule +An optional periodic schedule for this job. The default behavior is that the job only runs when triggered by clicking “Run Now” in the Jobs UI or sending an API request to `runNow`. +#### Attributes +| Key | Type | Description | +|--------------------------|--------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `pause_status` | String | Indicate whether this schedule is paused or not. | +| `quartz_cron_expression` | String | A Cron expression using Quartz syntax that describes the schedule for a job. See [Cron Trigger](http://www.quartz-scheduler.org/documentation/quartz-2.3.0/tutorials/crontrigger.html) for details. This field is required. | +| `timezone_id` | String | A Java timezone ID. The schedule for a job is resolved with respect to this timezone. See [Java TimeZone](https://docs.oracle.com/javase/7/docs/api/java/util/TimeZone.html) for details. This field is required. | + +### targets.resources.jobs.tags +A map of tags associated with the job. These are forwarded to the cluster as cluster tags for jobs clusters, and are subject to the same limitations as cluster tags. A maximum of 25 tags can be added to the job. +### targets.resources.jobs.trigger +A configuration to trigger a run when certain conditions are met. The default behavior is that the job runs only when triggered by clicking “Run Now” in the Jobs UI or sending an API request to `runNow`. +#### Attributes +| Key | Type | Description | +|----------------|--------|-------------------------------------------------------------------------| +| `file_arrival` | Map | File arrival trigger settings. | +| `pause_status` | String | Whether this trigger is paused or not. | +| `periodic` | Map | Periodic trigger settings. | +| `table` | Map | Old table trigger settings name. Deprecated in favor of `table_update`. | +| `table_update` | Map | - | + +### targets.resources.jobs.trigger.file_arrival +File arrival trigger settings. +#### Attributes +| Key | Type | Description | +|-------------------------------------|---------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `min_time_between_triggers_seconds` | Integer | If set, the trigger starts a run only after the specified amount of time passed since the last time the trigger fired. The minimum allowed value is 60 seconds | +| `url` | String | URL to be monitored for file arrivals. The path must point to the root or a subpath of the external location. | +| `wait_after_last_change_seconds` | Integer | If set, the trigger starts a run only after no file activity has occurred for the specified amount of time. This makes it possible to wait for a batch of incoming files to arrive before triggering a run. The minimum allowed value is 60 seconds. | + +### targets.resources.jobs.trigger.periodic +Periodic trigger settings. +#### Attributes +| Key | Type | Description | +|------------|---------|-----------------------------------------------| +| `interval` | Integer | The interval at which the trigger should run. | +| `unit` | String | The unit of time for the interval. | + +### targets.resources.jobs.trigger.table +Old table trigger settings name. Deprecated in favor of `table_update`. +#### Attributes +| Key | Type | Description | +|-------------------------------------|----------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `condition` | String | The table(s) condition based on which to trigger a job run. | +| `min_time_between_triggers_seconds` | Integer | If set, the trigger starts a run only after the specified amount of time has passed since the last time the trigger fired. The minimum allowed value is 60 seconds. | +| `table_names` | Sequence | A list of Delta tables to monitor for changes. The table name must be in the format `catalog_name.schema_name.table_name`. | +| `wait_after_last_change_seconds` | Integer | If set, the trigger starts a run only after no table updates have occurred for the specified time and can be used to wait for a series of table updates before triggering a run. The minimum allowed value is 60 seconds. | + +### targets.resources.jobs.trigger.table_update + +#### Attributes +| Key | Type | Description | +|-------------------------------------|----------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `condition` | String | The table(s) condition based on which to trigger a job run. | +| `min_time_between_triggers_seconds` | Integer | If set, the trigger starts a run only after the specified amount of time has passed since the last time the trigger fired. The minimum allowed value is 60 seconds. | +| `table_names` | Sequence | A list of Delta tables to monitor for changes. The table name must be in the format `catalog_name.schema_name.table_name`. | +| `wait_after_last_change_seconds` | Integer | If set, the trigger starts a run only after no table updates have occurred for the specified time and can be used to wait for a series of table updates before triggering a run. The minimum allowed value is 60 seconds. | + +### targets.resources.jobs.webhook_notifications +A collection of system notification IDs to notify when runs of this job begin or complete. +#### Attributes +| Key | Type | Description | +|------------------------------------------|----------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `on_duration_warning_threshold_exceeded` | Sequence | An optional list of system notification IDs to call when the duration of a run exceeds the threshold specified for the `RUN_DURATION_SECONDS` metric in the `health` field. A maximum of 3 destinations can be specified for the `on_duration_warning_threshold_exceeded` property. | +| `on_failure` | Sequence | An optional list of system notification IDs to call when the run fails. A maximum of 3 destinations can be specified for the `on_failure` property. | +| `on_start` | Sequence | An optional list of system notification IDs to call when the run starts. A maximum of 3 destinations can be specified for the `on_start` property. | +| `on_streaming_backlog_exceeded` | Sequence | An optional list of system notification IDs to call when any streaming backlog thresholds are exceeded for any stream. Streaming backlog thresholds can be set in the `health` field using the following metrics: `STREAMING_BACKLOG_BYTES`, `STREAMING_BACKLOG_RECORDS`, `STREAMING_BACKLOG_SECONDS`, or `STREAMING_BACKLOG_FILES`. Alerting is based on the 10-minute average of these metrics. If the issue persists, notifications are resent every 30 minutes. A maximum of 3 destinations can be specified for the `on_streaming_backlog_exceeded` property. | +| `on_success` | Sequence | An optional list of system notification IDs to call when the run completes successfully. A maximum of 3 destinations can be specified for the `on_success` property. | + +### targets.resources.model_serving_endpoints +The model serving endpoint definitions for the bundle. +| Key | Type | Description | +|----------|------|----------------------------| +| `` | Map | The definition of the item | + +Each item has the following attributes: +| Key | Type | Description | +|-------------------|----------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `ai_gateway` | Map | The AI Gateway configuration for the serving endpoint. NOTE: only external model endpoints are supported as of now. | +| `config` | Map | The core config of the serving endpoint. | +| `name` | String | The name of the serving endpoint. This field is required and must be unique across a Databricks workspace. An endpoint name can consist of alphanumeric characters, dashes, and underscores. | +| `permissions` | Sequence | - | +| `rate_limits` | Sequence | Rate limits to be applied to the serving endpoint. NOTE: this field is deprecated, please use AI Gateway to manage rate limits. | +| `route_optimized` | Boolean | Enable route optimization for the serving endpoint. | +| `tags` | Sequence | Tags to be attached to the serving endpoint and automatically propagated to billing logs. | + +### targets.resources.model_serving_endpoints.ai_gateway +The AI Gateway configuration for the serving endpoint. NOTE: only external model endpoints are supported as of now. +#### Attributes +| Key | Type | Description | +|--------------------------|----------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `guardrails` | Map | Configuration for AI Guardrails to prevent unwanted data and unsafe data in requests and responses. | +| `inference_table_config` | Map | Configuration for payload logging using inference tables. Use these tables to monitor and audit data being sent to and received from model APIs and to improve model quality. | +| `rate_limits` | Sequence | Configuration for rate limits which can be set to limit endpoint traffic. | +| `usage_tracking_config` | Map | Configuration to enable usage tracking using system tables. These tables allow you to monitor operational usage on endpoints and their associated costs. | + +### targets.resources.model_serving_endpoints.ai_gateway.guardrails +Configuration for AI Guardrails to prevent unwanted data and unsafe data in requests and responses. +#### Attributes +| Key | Type | Description | +|----------|------|---------------------------------------------| +| `input` | Map | Configuration for input guardrail filters. | +| `output` | Map | Configuration for output guardrail filters. | + +### targets.resources.model_serving_endpoints.ai_gateway.guardrails.input +Configuration for input guardrail filters. +#### Attributes +| Key | Type | Description | +|--------------------|----------|--------------------------------------------------------------------------------------------------------------------------------------------| +| `invalid_keywords` | Sequence | List of invalid keywords. AI guardrail uses keyword or string matching to decide if the keyword exists in the request or response content. | +| `pii` | Map | Configuration for guardrail PII filter. | +| `safety` | Boolean | Indicates whether the safety filter is enabled. | +| `valid_topics` | Sequence | The list of allowed topics. Given a chat request, this guardrail flags the request if its topic is not in the allowed topics. | + +### targets.resources.model_serving_endpoints.ai_gateway.guardrails.input.pii +Configuration for guardrail PII filter. +#### Attributes +| Key | Type | Description | +|------------|--------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `behavior` | String | Behavior for PII filter. Currently only 'BLOCK' is supported. If 'BLOCK' is set for the input guardrail and the request contains PII, the request is not sent to the model server and 400 status code is returned; if 'BLOCK' is set for the output guardrail and the model response contains PII, the PII info in the response is redacted and 400 status code is returned. | + +### targets.resources.model_serving_endpoints.ai_gateway.guardrails.output +Configuration for output guardrail filters. +#### Attributes +| Key | Type | Description | +|--------------------|----------|--------------------------------------------------------------------------------------------------------------------------------------------| +| `invalid_keywords` | Sequence | List of invalid keywords. AI guardrail uses keyword or string matching to decide if the keyword exists in the request or response content. | +| `pii` | Map | Configuration for guardrail PII filter. | +| `safety` | Boolean | Indicates whether the safety filter is enabled. | +| `valid_topics` | Sequence | The list of allowed topics. Given a chat request, this guardrail flags the request if its topic is not in the allowed topics. | + +### targets.resources.model_serving_endpoints.ai_gateway.guardrails.output.pii +Configuration for guardrail PII filter. +#### Attributes +| Key | Type | Description | +|------------|--------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `behavior` | String | Behavior for PII filter. Currently only 'BLOCK' is supported. If 'BLOCK' is set for the input guardrail and the request contains PII, the request is not sent to the model server and 400 status code is returned; if 'BLOCK' is set for the output guardrail and the model response contains PII, the PII info in the response is redacted and 400 status code is returned. | + +### targets.resources.model_serving_endpoints.ai_gateway.inference_table_config +Configuration for payload logging using inference tables. Use these tables to monitor and audit data being sent to and received from model APIs and to improve model quality. +#### Attributes +| Key | Type | Description | +|---------------------|---------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `catalog_name` | String | The name of the catalog in Unity Catalog. Required when enabling inference tables. NOTE: On update, you have to disable inference table first in order to change the catalog name. | +| `enabled` | Boolean | Indicates whether the inference table is enabled. | +| `schema_name` | String | The name of the schema in Unity Catalog. Required when enabling inference tables. NOTE: On update, you have to disable inference table first in order to change the schema name. | +| `table_name_prefix` | String | The prefix of the table in Unity Catalog. NOTE: On update, you have to disable inference table first in order to change the prefix name. | + +### targets.resources.model_serving_endpoints.ai_gateway.usage_tracking_config +Configuration to enable usage tracking using system tables. These tables allow you to monitor operational usage on endpoints and their associated costs. +#### Attributes +| Key | Type | Description | +|-----------|---------|-----------------------------------| +| `enabled` | Boolean | Whether to enable usage tracking. | + +### targets.resources.model_serving_endpoints.config +The core config of the serving endpoint. +#### Attributes +| Key | Type | Description | +|-----------------------|----------|--------------------------------------------------------------------------------------------------------------------------------------------------| +| `auto_capture_config` | Map | Configuration for Inference Tables which automatically logs requests and responses to Unity Catalog. | +| `served_entities` | Sequence | A list of served entities for the endpoint to serve. A serving endpoint can have up to 15 served entities. | +| `served_models` | Sequence | (Deprecated, use served_entities instead) A list of served models for the endpoint to serve. A serving endpoint can have up to 15 served models. | +| `traffic_config` | Map | The traffic config defining how invocations to the serving endpoint should be routed. | + +### targets.resources.model_serving_endpoints.config.auto_capture_config +Configuration for Inference Tables which automatically logs requests and responses to Unity Catalog. +#### Attributes +| Key | Type | Description | +|---------------------|---------|------------------------------------------------------------------------------------------------------------------------------------------| +| `catalog_name` | String | The name of the catalog in Unity Catalog. NOTE: On update, you cannot change the catalog name if the inference table is already enabled. | +| `enabled` | Boolean | Indicates whether the inference table is enabled. | +| `schema_name` | String | The name of the schema in Unity Catalog. NOTE: On update, you cannot change the schema name if the inference table is already enabled. | +| `table_name_prefix` | String | The prefix of the table in Unity Catalog. NOTE: On update, you cannot change the prefix name if the inference table is already enabled. | + +### targets.resources.model_serving_endpoints.config.traffic_config +The traffic config defining how invocations to the serving endpoint should be routed. +#### Attributes +| Key | Type | Description | +|----------|----------|---------------------------------------------------------------| +| `routes` | Sequence | The list of routes that define traffic to each served entity. | + +### targets.resources.models +The model definitions for the bundle. +| Key | Type | Description | +|----------|------|----------------------------| +| `` | Map | The definition of the item | + +Each item has the following attributes: +| Key | Type | Description | +|--------------------------|----------|-------------------------------------------------------------------------------------------------------| +| `creation_timestamp` | Integer | Timestamp recorded when this `registered_model` was created. | +| `description` | String | Description of this `registered_model`. | +| `last_updated_timestamp` | Integer | Timestamp recorded when metadata for this `registered_model` was last updated. | +| `latest_versions` | Sequence | Collection of latest model versions for each stage. Only contains models with current `READY` status. | +| `name` | String | Unique name for the model. | +| `permissions` | Sequence | - | +| `tags` | Sequence | Tags: Additional metadata key-value pairs for this `registered_model`. | +| `user_id` | String | User that created this `registered_model` | + +### targets.resources.pipelines +The pipeline definitions for the bundle. +| Key | Type | Description | +|----------|------|----------------------------| +| `` | Map | The definition of the item | + +Each item has the following attributes: +| Key | Type | Description | +|------------------------|----------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `budget_policy_id` | String | Budget policy of this pipeline. | +| `catalog` | String | A catalog in Unity Catalog to publish data from this pipeline to. If `target` is specified, tables in this pipeline are published to a `target` schema inside `catalog` (for example, `catalog`.`target`.`table`). If `target` is not specified, no data is published to Unity Catalog. | +| `channel` | String | DLT Release Channel that specifies which version to use. | +| `clusters` | Sequence | Cluster settings for this pipeline deployment. | +| `configuration` | Map | String-String configuration for this pipeline execution. | +| `continuous` | Boolean | Whether the pipeline is continuous or triggered. This replaces `trigger`. | +| `deployment` | Map | Deployment type of this pipeline. | +| `development` | Boolean | Whether the pipeline is in Development mode. Defaults to false. | +| `edition` | String | Pipeline product edition. | +| `filters` | Map | Filters on which Pipeline packages to include in the deployed graph. | +| `gateway_definition` | Map | The definition of a gateway pipeline to support change data capture. | +| `id` | String | Unique identifier for this pipeline. | +| `ingestion_definition` | Map | The configuration for a managed ingestion pipeline. These settings cannot be used with the 'libraries', 'target' or 'catalog' settings. | +| `libraries` | Sequence | Libraries or code needed by this deployment. | +| `name` | String | Friendly identifier for this pipeline. | +| `notifications` | Sequence | List of notification settings for this pipeline. | +| `permissions` | Sequence | - | +| `photon` | Boolean | Whether Photon is enabled for this pipeline. | +| `restart_window` | Map | Restart window of this pipeline. | +| `schema` | String | The default schema (database) where tables are read from or published to. The presence of this field implies that the pipeline is in direct publishing mode. | +| `serverless` | Boolean | Whether serverless compute is enabled for this pipeline. | +| `storage` | String | DBFS root directory for storing checkpoints and tables. | +| `target` | String | Target schema (database) to add tables in this pipeline to. If not specified, no data is published to the Hive metastore or Unity Catalog. To publish to Unity Catalog, also specify `catalog`. | +| `trigger` | Map | Which pipeline trigger to use. Deprecated: Use `continuous` instead. | + +### targets.resources.pipelines.configuration +String-String configuration for this pipeline execution. +### targets.resources.pipelines.deployment +Deployment type of this pipeline. +#### Attributes +| Key | Type | Description | +|----------------------|--------|----------------------------------------------------------------| +| `kind` | String | The deployment method that manages the pipeline. | +| `metadata_file_path` | String | The path to the file containing metadata about the deployment. | + +### targets.resources.pipelines.filters +Filters on which Pipeline packages to include in the deployed graph. +#### Attributes +| Key | Type | Description | +|-----------|----------|-------------------| +| `exclude` | Sequence | Paths to exclude. | +| `include` | Sequence | Paths to include. | + +### targets.resources.pipelines.gateway_definition +The definition of a gateway pipeline to support change data capture. +#### Attributes +| Key | Type | Description | +|---------------------------|--------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `connection_id` | String | [Deprecated, use connection_name instead] Immutable. The Unity Catalog connection that this gateway pipeline uses to communicate with the source. | +| `connection_name` | String | Immutable. The Unity Catalog connection that this gateway pipeline uses to communicate with the source. | +| `gateway_storage_catalog` | String | Required, Immutable. The name of the catalog for the gateway pipeline's storage location. | +| `gateway_storage_name` | String | Optional. The Unity Catalog-compatible name for the gateway storage location. This is the destination to use for the data that is extracted by the gateway. Delta Live Tables system will automatically create the storage location under the catalog and schema. | +| `gateway_storage_schema` | String | Required, Immutable. The name of the schema for the gateway pipelines's storage location. | + +### targets.resources.pipelines.ingestion_definition +The configuration for a managed ingestion pipeline. These settings cannot be used with the 'libraries', 'target' or 'catalog' settings. +#### Attributes +| Key | Type | Description | +|------------------------|----------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `connection_name` | String | Immutable. The Unity Catalog connection that this ingestion pipeline uses to communicate with the source. This is used with connectors for applications like Salesforce, Workday, and so on. | +| `ingestion_gateway_id` | String | Immutable. Identifier for the gateway that is used by this ingestion pipeline to communicate with the source database. This is used with connectors to databases like SQL Server. | +| `objects` | Sequence | Required. Settings specifying tables to replicate and the destination for the replicated tables. | +| `table_configuration` | Map | Configuration settings to control the ingestion of tables. These settings are applied to all tables in the pipeline. | + +### targets.resources.pipelines.ingestion_definition.table_configuration +Configuration settings to control the ingestion of tables. These settings are applied to all tables in the pipeline. +#### Attributes +| Key | Type | Description | +|-------------------------------------|----------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `primary_keys` | Sequence | The primary key of the table used to apply changes. | +| `salesforce_include_formula_fields` | Boolean | If true, formula fields defined in the table are included in the ingestion. This setting is only valid for the Salesforce connector | +| `scd_type` | String | The SCD type to use to ingest the table. | +| `sequence_by` | Sequence | The column names specifying the logical order of events in the source data. Delta Live Tables uses this sequencing to handle change events that arrive out of order. | + +### targets.resources.pipelines.restart_window +Restart window of this pipeline. +#### Attributes +| Key | Type | Description | +|----------------|---------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `days_of_week` | String | Days of week in which the restart is allowed to happen (within a five-hour window starting at start_hour). If not specified all days of the week will be used. | +| `start_hour` | Integer | An integer between 0 and 23 denoting the start hour for the restart window in the 24-hour day. Continuous pipeline restart is triggered only within a five-hour window starting at this hour. | +| `time_zone_id` | String | Time zone id of restart window. See https://docs.databricks.com/sql/language-manual/sql-ref-syntax-aux-conf-mgmt-set-timezone.html for details. If not specified, UTC will be used. | + +### targets.resources.pipelines.trigger +Which pipeline trigger to use. Deprecated: Use `continuous` instead. +#### Attributes +| Key | Type | Description | +|----------|------|-------------| +| `cron` | Map | - | +| `manual` | Map | - | + +### targets.resources.pipelines.trigger.cron + +#### Attributes +| Key | Type | Description | +|------------------------|--------|-------------| +| `quartz_cron_schedule` | String | - | +| `timezone_id` | String | - | + +### targets.resources.pipelines.trigger.manual + +### targets.resources.quality_monitors +The quality monitor definitions for the bundle. +| Key | Type | Description | +|----------|------|----------------------------| +| `` | Map | The definition of the item | + +Each item has the following attributes: +| Key | Type | Description | +|------------------------------|----------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `assets_dir` | String | The directory to store monitoring assets (e.g. dashboard, metric tables). | +| `baseline_table_name` | String | Name of the baseline table from which drift metrics are computed from. Columns in the monitored table should also be present in the baseline table. | +| `custom_metrics` | Sequence | Custom metrics to compute on the monitored table. These can be aggregate metrics, derived metrics (from already computed aggregate metrics), or drift metrics (comparing metrics across time windows). | +| `data_classification_config` | Map | The data classification config for the monitor. | +| `inference_log` | Map | Configuration for monitoring inference logs. | +| `notifications` | Map | The notification settings for the monitor. | +| `output_schema_name` | String | Schema where output metric tables are created. | +| `schedule` | Map | The schedule for automatically updating and refreshing metric tables. | +| `skip_builtin_dashboard` | Boolean | Whether to skip creating a default dashboard summarizing data quality metrics. | +| `slicing_exprs` | Sequence | List of column expressions to slice data with for targeted analysis. The data is grouped by each expression independently, resulting in a separate slice for each predicate and its complements. For high-cardinality columns, only the top 100 unique values by frequency will generate slices. | +| `snapshot` | Map | Configuration for monitoring snapshot tables. | +| `table_name` | String | - | +| `time_series` | Map | Configuration for monitoring time series tables. | +| `warehouse_id` | String | Optional argument to specify the warehouse for dashboard creation. If not specified, the first running warehouse will be used. | + +### targets.resources.quality_monitors.data_classification_config +The data classification config for the monitor. +#### Attributes +| Key | Type | Description | +|-----------|---------|-----------------------------------------| +| `enabled` | Boolean | Whether data classification is enabled. | + +### targets.resources.quality_monitors.inference_log +Configuration for monitoring inference logs. +#### Attributes +| Key | Type | Description | +|------------------------|----------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `granularities` | Sequence | Granularities for aggregating data into time windows based on their timestamp. Currently the following static granularities are supported: {``"5 minutes"``, ``"30 minutes"``, ``"1 hour"``, ``"1 day"``, ``" week(s)"``, ``"1 month"``, ``"1 year"``}. | +| `label_col` | String | Optional column that contains the ground truth for the prediction. | +| `model_id_col` | String | Column that contains the id of the model generating the predictions. Metrics will be computed per model id by default, and also across all model ids. | +| `prediction_col` | String | Column that contains the output/prediction from the model. | +| `prediction_proba_col` | String | Optional column that contains the prediction probabilities for each class in a classification problem type. The values in this column should be a map, mapping each class label to the prediction probability for a given sample. The map should be of PySpark MapType(). | +| `problem_type` | String | Problem type the model aims to solve. Determines the type of model-quality metrics that will be computed. | +| `timestamp_col` | String | Column that contains the timestamps of requests. The column must be one of the following: - A ``TimestampType`` column - A column whose values can be converted to timestamps through the pyspark ``to_timestamp`` [function](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.functions.to_timestamp.html). | + +### targets.resources.quality_monitors.notifications +The notification settings for the monitor. +#### Attributes +| Key | Type | Description | +|--------------------------------------|------|------------------------------------------------------------------------------| +| `on_failure` | Map | Who to send notifications to on monitor failure. | +| `on_new_classification_tag_detected` | Map | Who to send notifications to when new data classification tags are detected. | + +### targets.resources.quality_monitors.notifications.on_failure +Who to send notifications to on monitor failure. +#### Attributes +| Key | Type | Description | +|-------------------|----------|-------------------------------------------------------------------------------------------------------| +| `email_addresses` | Sequence | The list of email addresses to send the notification to. A maximum of 5 email addresses is supported. | + +### targets.resources.quality_monitors.notifications.on_new_classification_tag_detected +Who to send notifications to when new data classification tags are detected. +#### Attributes +| Key | Type | Description | +|-------------------|----------|-------------------------------------------------------------------------------------------------------| +| `email_addresses` | Sequence | The list of email addresses to send the notification to. A maximum of 5 email addresses is supported. | + +### targets.resources.quality_monitors.schedule +The schedule for automatically updating and refreshing metric tables. +#### Attributes +| Key | Type | Description | +|--------------------------|--------|------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `pause_status` | String | Read only field that indicates whether a schedule is paused or not. | +| `quartz_cron_expression` | String | The expression that determines when to run the monitor. See [examples](https://www.quartz-scheduler.org/documentation/quartz-2.3.0/tutorials/crontrigger.html). | +| `timezone_id` | String | The timezone id (e.g., ``"PST"``) in which to evaluate the quartz expression. | + +### targets.resources.quality_monitors.snapshot +Configuration for monitoring snapshot tables. +### targets.resources.quality_monitors.time_series +Configuration for monitoring time series tables. +#### Attributes +| Key | Type | Description | +|-----------------|----------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `granularities` | Sequence | Granularities for aggregating data into time windows based on their timestamp. Currently the following static granularities are supported: {``"5 minutes"``, ``"30 minutes"``, ``"1 hour"``, ``"1 day"``, ``" week(s)"``, ``"1 month"``, ``"1 year"``}. | +| `timestamp_col` | String | Column that contains the timestamps of requests. The column must be one of the following: - A ``TimestampType`` column - A column whose values can be converted to timestamps through the pyspark ``to_timestamp`` [function](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.functions.to_timestamp.html). | + +### targets.resources.registered_models +The registered model definitions for the bundle. +| Key | Type | Description | +|----------|------|----------------------------| +| `` | Map | The definition of the item | + +Each item has the following attributes: +| Key | Type | Description | +|--------------------|----------|-----------------------------------------------------------------------------------| +| `catalog_name` | String | The name of the catalog where the schema and the registered model reside | +| `comment` | String | The comment attached to the registered model | +| `grants` | Sequence | - | +| `name` | String | The name of the registered model | +| `schema_name` | String | The name of the schema where the registered model resides | +| `storage_location` | String | The storage location on the cloud under which model version data files are stored | + +### targets.resources.schemas +The schema definitions for the bundle. +| Key | Type | Description | +|----------|------|----------------------------| +| `` | Map | The definition of the item | + +Each item has the following attributes: +| Key | Type | Description | +|----------------|----------|----------------------------------------------------| +| `catalog_name` | String | Name of parent catalog. | +| `comment` | String | User-provided free-form text description. | +| `grants` | Sequence | - | +| `name` | String | Name of schema, relative to parent catalog. | +| `properties` | Map | - | +| `storage_root` | String | Storage root URL for managed tables within schema. | + +### targets.resources.schemas.properties + +### targets.resources.volumes + +| Key | Type | Description | +|----------|------|----------------------------| +| `` | Map | The definition of the item | + +Each item has the following attributes: +| Key | Type | Description | +|--------------------|----------|-------------------------------------------------------------| +| `catalog_name` | String | The name of the catalog where the schema and the volume are | +| `comment` | String | The comment attached to the volume | +| `grants` | Sequence | - | +| `name` | String | The name of the volume | +| `schema_name` | String | The name of the schema where the volume is | +| `storage_location` | String | The storage location on the cloud | +| `volume_type` | String | - | + +### targets.run_as +The identity to use to run the bundle. +#### Attributes +| Key | Type | Description | +|--------------------------|--------|--------------------------------------------------------------------------------------------------------------| +| `service_principal_name` | String | Application ID of an active service principal. Setting this field requires the `servicePrincipal/user` role. | +| `user_name` | String | The email of an active workspace user. Non-admin users can only set this field to their own email. | + +### targets.sync +The local paths to sync to the target workspace when a bundle is run or deployed. +#### Attributes +| Key | Type | Description | +|-----------|----------|----------------------------------------------------------------------------------------------------------------------------| +| `exclude` | Sequence | A list of files or folders to exclude from the bundle. | +| `include` | Sequence | A list of files or folders to include in the bundle. | +| `paths` | Sequence | The local folder paths, which can be outside the bundle root, to synchronize to the workspace when the bundle is deployed. | + +### targets.variables +The custom variable definitions for the target. +| Key | Type | Description | +|----------|------|----------------------------| +| `` | Map | The definition of the item | + +Each item has the following attributes: +| Key | Type | Description | +|---------------|--------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `default` | Any | - | +| `description` | String | The description of the variable. | +| `lookup` | Map | The name of the alert, cluster_policy, cluster, dashboard, instance_pool, job, metastore, pipeline, query, service_principal, or warehouse object for which to retrieve an ID. | +| `type` | String | The type of the variable. | + +### targets.variables.lookup +The name of the alert, cluster_policy, cluster, dashboard, instance_pool, job, metastore, pipeline, query, service_principal, or warehouse object for which to retrieve an ID. +#### Attributes +| Key | Type | Description | +|----------------------------|--------|-------------| +| `alert` | String | - | +| `cluster` | String | - | +| `cluster_policy` | String | - | +| `dashboard` | String | - | +| `instance_pool` | String | - | +| `job` | String | - | +| `metastore` | String | - | +| `notification_destination` | String | - | +| `pipeline` | String | - | +| `query` | String | - | +| `service_principal` | String | - | +| `warehouse` | String | - | + +### targets.workspace +The Databricks workspace for the target. +#### Attributes +| Key | Type | Description | +|-------------------------------|---------|--------------------------------------------------------------------------------------| +| `artifact_path` | String | The artifact path to use within the workspace for both deployments and workflow runs | +| `auth_type` | String | The authentication type. | +| `azure_client_id` | String | The Azure client ID | +| `azure_environment` | String | The Azure environment | +| `azure_login_app_id` | String | The Azure login app ID | +| `azure_tenant_id` | String | The Azure tenant ID | +| `azure_use_msi` | Boolean | Whether to use MSI for Azure | +| `azure_workspace_resource_id` | String | The Azure workspace resource ID | +| `client_id` | String | The client ID for the workspace | +| `file_path` | String | The file path to use within the workspace for both deployments and workflow runs | +| `google_service_account` | String | The Google service account name | +| `host` | String | The Databricks workspace host URL | +| `profile` | String | The Databricks workspace profile name | +| `resource_path` | String | The workspace resource path | +| `root_path` | String | The Databricks workspace root path | +| `state_path` | String | The workspace state path | + +## variables +A Map that defines the custom variables for the bundle, where each key is the name of the variable, and the value is a Map that defines the variable. +| Key | Type | Description | +|----------|------|----------------------------| +| `` | Map | The definition of the item | + +Each item has the following attributes: +| Key | Type | Description | +|---------------|--------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `default` | Any | - | +| `description` | String | The description of the variable | +| `lookup` | Map | The name of the `alert`, `cluster_policy`, `cluster`, `dashboard`, `instance_pool`, `job`, `metastore`, `pipeline`, `query`, `service_principal`, or `warehouse` object for which to retrieve an ID." | +| `type` | String | The type of the variable. | + +### variables.lookup +The name of the alert, cluster_policy, cluster, dashboard, instance_pool, job, metastore, pipeline, query, service_principal, or warehouse object for which to retrieve an ID. +#### Attributes +| Key | Type | Description | +|----------------------------|--------|-------------| +| `alert` | String | - | +| `cluster` | String | - | +| `cluster_policy` | String | - | +| `dashboard` | String | - | +| `instance_pool` | String | - | +| `job` | String | - | +| `metastore` | String | - | +| `notification_destination` | String | - | +| `pipeline` | String | - | +| `query` | String | - | +| `service_principal` | String | - | +| `warehouse` | String | - | + +## workspace +Defines the Databricks workspace for the bundle. +#### Attributes +| Key | Type | Description | +|-------------------------------|---------|--------------------------------------------------------------------------------------| +| `artifact_path` | String | The artifact path to use within the workspace for both deployments and workflow runs | +| `auth_type` | String | The authentication type. | +| `azure_client_id` | String | The Azure client ID | +| `azure_environment` | String | The Azure environment | +| `azure_login_app_id` | String | The Azure login app ID | +| `azure_tenant_id` | String | The Azure tenant ID | +| `azure_use_msi` | Boolean | Whether to use MSI for Azure | +| `azure_workspace_resource_id` | String | The Azure workspace resource ID | +| `client_id` | String | The client ID for the workspace | +| `file_path` | String | The file path to use within the workspace for both deployments and workflow runs | +| `google_service_account` | String | The Google service account name | +| `host` | String | The Databricks workspace host URL | +| `profile` | String | The Databricks workspace profile name | +| `resource_path` | String | The workspace resource path | +| `root_path` | String | The Databricks workspace root path | +| `state_path` | String | The workspace state path | diff --git a/bundle/internal/docs/main.go b/bundle/internal/docs/main.go new file mode 100644 index 0000000000..5c033d84fc --- /dev/null +++ b/bundle/internal/docs/main.go @@ -0,0 +1,183 @@ +package main + +import ( + "bytes" + "fmt" + "log" + "os" + "path/filepath" + "reflect" + "strings" + + "github.com/databricks/cli/bundle/config" + "github.com/databricks/cli/bundle/config/resources" + "github.com/databricks/cli/libs/dyn" + "github.com/databricks/cli/libs/dyn/convert" + "github.com/databricks/cli/libs/dyn/merge" + "github.com/databricks/cli/libs/dyn/yamlloader" + "github.com/databricks/cli/libs/jsonschema" + "github.com/databricks/databricks-sdk-go/service/jobs" +) + +const Placeholder = "PLACEHOLDER" + +func removeJobsFields(typ reflect.Type, s jsonschema.Schema) jsonschema.Schema { + switch typ { + case reflect.TypeOf(resources.Job{}): + // This field has been deprecated in jobs API v2.1 and is always set to + // "MULTI_TASK" in the backend. We should not expose it to the user. + delete(s.Properties, "format") + + // These fields are only meant to be set by the DABs client (ie the CLI) + // and thus should not be exposed to the user. These are used to annotate + // jobs that were created by DABs. + delete(s.Properties, "deployment") + delete(s.Properties, "edit_mode") + + case reflect.TypeOf(jobs.GitSource{}): + // These fields are readonly and are not meant to be set by the user. + delete(s.Properties, "job_source") + delete(s.Properties, "git_snapshot") + + default: + // Do nothing + } + + return s +} + +// While volume_type is required in the volume create API, DABs automatically sets +// it's value to "MANAGED" if it's not provided. Thus, we make it optional +// in the bundle schema. +func makeVolumeTypeOptional(typ reflect.Type, s jsonschema.Schema) jsonschema.Schema { + if typ != reflect.TypeOf(resources.Volume{}) { + return s + } + + res := []string{} + for _, r := range s.Required { + if r != "volume_type" { + res = append(res, r) + } + } + s.Required = res + return s +} + +func main() { + if len(os.Args) != 3 { + fmt.Println("Usage: go run main.go ") + os.Exit(1) + } + + annotationFile := os.Args[1] + outputFile := os.Args[2] + + err := generateDocs(annotationFile, outputFile) + if err != nil { + log.Fatal(err) + } +} + +type annotationFile map[string]map[string]annotation + +type annotation struct { + Description string `json:"description,omitempty"` + MarkdownDescription string `json:"markdown_description,omitempty"` + Title string `json:"title,omitempty"` + Default any `json:"default,omitempty"` + Enum []any `json:"enum,omitempty"` +} + +func generateDocs(workdir, outputPath string) error { + annotationsPath := filepath.Join(workdir, "annotations.yml") + annotationsOpenApiPath := filepath.Join(workdir, "annotations_openapi.yml") + annotationsOpenApiOverridesPath := filepath.Join(workdir, "annotations_openapi_overrides.yml") + + annotations, err := LoadAndMergeAnnotations([]string{annotationsPath, annotationsOpenApiPath, annotationsOpenApiOverridesPath}) + if err != nil { + log.Fatal(err) + } + + schemas := map[string]jsonschema.Schema{} + + s, err := jsonschema.FromType(reflect.TypeOf(config.Root{}), []func(reflect.Type, jsonschema.Schema) jsonschema.Schema{ + removeJobsFields, + makeVolumeTypeOptional, + + func(typ reflect.Type, s jsonschema.Schema) jsonschema.Schema { + schemas[jsonschema.TypePath(typ)] = s + + refPath := getPath(typ) + shouldHandle := strings.HasPrefix(refPath, "github.com") + if !shouldHandle { + return s + } + + a := annotations[refPath] + if a == nil { + a = map[string]annotation{} + } + + rootTypeAnnotation, ok := a["_"] + if ok { + assignAnnotation(&s, rootTypeAnnotation) + } + + for k, v := range s.Properties { + assignAnnotation(v, a[k]) + } + + return s + }, + }) + if err != nil { + log.Fatal(err) + } + + nodes := getNodes(s, schemas, annotations) + err = buildMarkdown(nodes, outputPath) + if err != nil { + log.Fatal(err) + } + return nil +} + +func getPath(typ reflect.Type) string { + return typ.PkgPath() + "." + typ.Name() +} + +func assignAnnotation(s *jsonschema.Schema, a annotation) { + if a.Description != "" && a.Description != Placeholder { + s.Description = a.Description + } + if a.MarkdownDescription != "" { + s.MarkdownDescription = a.MarkdownDescription + } +} + +func LoadAndMergeAnnotations(sources []string) (annotationFile, error) { + prev := dyn.NilValue + for _, path := range sources { + b, err := os.ReadFile(path) + if err != nil { + return nil, err + } + generated, err := yamlloader.LoadYAML(path, bytes.NewBuffer(b)) + if err != nil { + return nil, err + } + prev, err = merge.Merge(prev, generated) + if err != nil { + return nil, err + } + } + + var data annotationFile + + err := convert.ToTyped(&data, prev) + if err != nil { + return nil, err + } + return data, nil +} diff --git a/go.mod b/go.mod index c9a008fb3e..d999344229 100644 --- a/go.mod +++ b/go.mod @@ -53,8 +53,13 @@ require ( github.com/hashicorp/go-cleanhttp v0.5.2 // indirect github.com/hashicorp/go-retryablehttp v0.7.7 // indirect github.com/inconshreveable/mousetrap v1.1.0 // indirect + github.com/karrick/godirwalk v1.17.0 // indirect github.com/mattn/go-colorable v0.1.13 // indirect + github.com/mattn/go-runewidth v0.0.9 // indirect + github.com/nao1215/markdown v0.6.0 // indirect + github.com/olekukonko/tablewriter v0.0.5 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect + github.com/radovskyb/watcher v1.0.7 // indirect github.com/stretchr/objx v0.5.2 // indirect github.com/zclconf/go-cty v1.15.0 // indirect go.opencensus.io v0.24.0 // indirect diff --git a/go.sum b/go.sum index 63bf2be333..e0b58c4f14 100644 --- a/go.sum +++ b/go.sum @@ -115,6 +115,8 @@ github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2 github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw= github.com/jbenet/go-context v0.0.0-20150711004518-d14ea06fba99 h1:BQSFePA1RWJOlocH6Fxy8MmwDt+yVQYULKfN0RoTN8A= github.com/jbenet/go-context v0.0.0-20150711004518-d14ea06fba99/go.mod h1:1lJo3i6rXxKeerYnT8Nvf0QmHCRC1n8sfWVwXF2Frvo= +github.com/karrick/godirwalk v1.17.0 h1:b4kY7nqDdioR/6qnbHQyDvmA17u5G1cZ6J+CZXwSWoI= +github.com/karrick/godirwalk v1.17.0/go.mod h1:j4mkqPuvaLI8mp1DroR3P6ad7cyYd4c1qeJ3RV7ULlk= github.com/kevinburke/ssh_config v1.2.0 h1:x584FjTGwHzMwvHx18PXxbBVzfnxogHaAReU4gf13a4= github.com/kevinburke/ssh_config v1.2.0/go.mod h1:CT57kijsi8u/K/BOFA39wgDQJ9CxiF4nAY/ojJ6r6mM= github.com/manifoldco/promptui v0.9.0 h1:3V4HzJk1TtXW1MTZMP7mdlwbBpIinw3HztaIlYthEiA= @@ -127,8 +129,14 @@ github.com/mattn/go-isatty v0.0.11/go.mod h1:PhnuNfih5lzO57/f3n+odYbM4JtupLOxQOA github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM= github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY= github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= +github.com/mattn/go-runewidth v0.0.9 h1:Lm995f3rfxdpd6TSmuVCHVb/QhupuXlYr8sCI/QdE+0= +github.com/mattn/go-runewidth v0.0.9/go.mod h1:H031xJmbD/WCDINGzjvQ9THkh0rPKHF+m2gUSrubnMI= +github.com/nao1215/markdown v0.6.0 h1:kqhrC47K434YA1jMTUwJwSV/hla8ifN3NzehMEffI/E= +github.com/nao1215/markdown v0.6.0/go.mod h1:ObBhnNduWwPN+bu4dtv4JoLRt57ONla7l//03iHIVhY= github.com/nwidger/jsoncolor v0.3.2 h1:rVJJlwAWDJShnbTYOQ5RM7yTA20INyKXlJ/fg4JMhHQ= github.com/nwidger/jsoncolor v0.3.2/go.mod h1:Cs34umxLbJvgBMnVNVqhji9BhoT/N/KinHqZptQ7cf4= +github.com/olekukonko/tablewriter v0.0.5 h1:P2Ga83D34wi1o9J6Wh1mRuqd4mF/x/lgBS7N7AbDhec= +github.com/olekukonko/tablewriter v0.0.5/go.mod h1:hPp6KlRPjbx+hW8ykQs1w3UBbZlj6HuIJcUGPhkA7kY= github.com/pjbgf/sha1cd v0.3.0 h1:4D5XXmUUBUl/xQ6IjCkEAbqXskkq/4O7LmGn0AqMDs4= github.com/pjbgf/sha1cd v0.3.0/go.mod h1:nZ1rrWOcGJ5uZgEEVL1VUM9iRQiZvWdbZjkKyFzPPsI= github.com/pkg/browser v0.0.0-20210911075715-681adbf594b8 h1:KoWmjvw+nsYOo29YJK9vDA65RGE3NrOnUtO7a+RF9HU= @@ -136,6 +144,8 @@ github.com/pkg/browser v0.0.0-20210911075715-681adbf594b8/go.mod h1:HKlIX3XHQyzL github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= +github.com/radovskyb/watcher v1.0.7 h1:AYePLih6dpmS32vlHfhCeli8127LzkIgwJGcwwe8tUE= +github.com/radovskyb/watcher v1.0.7/go.mod h1:78okwvY5wPdzcb1UYnip1pvrZNIVEIh/Cm+ZuvsUYIg= github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= github.com/sabhiram/go-gitignore v0.0.0-20210923224102-525f6e181f06 h1:OkMGxebDjyw0ULyrTYWeN0UNCCkmCWfjPnIA2W6oviI= github.com/sabhiram/go-gitignore v0.0.0-20210923224102-525f6e181f06/go.mod h1:+ePHsJ1keEjQtpvf9HHw0f4ZeJ0TLRsxhunSI2hYJSs= diff --git a/libs/jsonschema/from_type.go b/libs/jsonschema/from_type.go index 18a2b3ba5a..238872f128 100644 --- a/libs/jsonschema/from_type.go +++ b/libs/jsonschema/from_type.go @@ -111,6 +111,10 @@ func FromType(typ reflect.Type, fns []func(typ reflect.Type, s Schema) Schema) ( return res, nil } +func TypePath(typ reflect.Type) string { + return typePath(typ) +} + // typePath computes a unique string representation of the type. $ref in the generated // JSON schema will refer to this path. See TestTypePath for examples outputs. func typePath(typ reflect.Type) string { From e27cc9a686dcb84694e4891ad6250e6deb2e68e8 Mon Sep 17 00:00:00 2001 From: Ilya Kuznetsov Date: Wed, 18 Dec 2024 19:20:13 +0100 Subject: [PATCH 02/26] fix: Custom table markup changes --- bundle/internal/docs/docs.go | 18 +- bundle/internal/docs/docs.md | 6476 +++++++++++++++++++++++++++------- 2 files changed, 5149 insertions(+), 1345 deletions(-) diff --git a/bundle/internal/docs/docs.go b/bundle/internal/docs/docs.go index 23742a38d2..10486a8d2f 100644 --- a/bundle/internal/docs/docs.go +++ b/bundle/internal/docs/docs.go @@ -90,25 +90,29 @@ func buildMarkdown(nodes []rootNode, outputFile string) error { m := md.NewMarkdown(f) for _, node := range nodes { + m = m.LF() if node.TopLevel { m = m.H2(node.Title) } else { m = m.H3(node.Title) } m = m.PlainText(node.Description) + m = m.LF() if len(node.ObjectKeyAttributes) > 0 { m = buildAttributeTable(m, []attributeNode{ {Title: AdditionalPropertiesAttributeTitle, Type: "Map", Description: AdditionalPropertiesAttributeDescription}, }) m = m.PlainText("Each item has the following attributes:") + m = m.LF() m = buildAttributeTable(m, node.ObjectKeyAttributes) - } else if len(node.ArrayItemAttributes) > 0 { - m = m.PlainText(fmt.Sprintf("Each item of `%s` has the following attributes:", node.Title)) + m = m.PlainTextf("Each item of `%s` has the following attributes:", node.Title) + m = m.LF() m = buildAttributeTable(m, node.ArrayItemAttributes) } else if len(node.Attributes) > 0 { m = m.H4("Attributes") + m = m.LF() m = buildAttributeTable(m, node.Attributes) } } @@ -122,6 +126,7 @@ func buildMarkdown(nodes []rootNode, outputFile string) error { } func buildAttributeTable(m *md.Markdown, attributes []attributeNode) *md.Markdown { + return buildCustomAttributeTable(m, attributes) rows := [][]string{} for _, n := range attributes { rows = append(rows, []string{fmt.Sprintf("`%s`", n.Title), n.Type, formatDescription(n.Description)}) @@ -135,25 +140,26 @@ func buildAttributeTable(m *md.Markdown, attributes []attributeNode) *md.Markdow } func formatDescription(s string) string { - if s == "" { - return "-" - } return strings.ReplaceAll(s, "\n", " ") } // Build a custom table which we use in Databricks website func buildCustomAttributeTable(m *md.Markdown, attributes []attributeNode) *md.Markdown { + m = m.LF() m = m.PlainText(".. list-table::") m = m.PlainText(" :header-rows: 1") + m = m.LF() m = m.PlainText(" * - Key") m = m.PlainText(" - Type") m = m.PlainText(" - Description") + m = m.LF() for _, a := range attributes { m = m.PlainText(" * - " + a.Title) m = m.PlainText(" - " + a.Type) - m = m.PlainText(" - " + a.Description) + m = m.PlainText(" - " + formatDescription(a.Description)) + m = m.LF() } return m } diff --git a/bundle/internal/docs/docs.md b/bundle/internal/docs/docs.md index 006b1727ed..86ffa078d0 100644 --- a/bundle/internal/docs/docs.md +++ b/bundle/internal/docs/docs.md @@ -1,238 +1,711 @@ + ## artifacts Defines the attributes to build an artifact -| Key | Type | Description | -|----------|------|----------------------------| -| `` | Map | The definition of the item | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - + - Map + - The definition of the item + Each item has the following attributes: -| Key | Type | Description | -|--------------|----------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| `build` | String | An optional set of non-default build commands that you want to run locally before deployment. For Python wheel builds, the Databricks CLI assumes that it can find a local install of the Python wheel package to run builds, and it runs the command python setup.py bdist_wheel by default during each bundle deployment. To specify multiple build commands, separate each command with double-ampersand (&&) characters. | -| `executable` | String | The executable type. | -| `files` | Sequence | The source files for the artifact, defined as an [_](#artifact_file). | -| `path` | String | The location where the built artifact will be saved. | -| `type` | String | The type of the artifact. Valid values are `wheel` or `jar` | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - build + - String + - An optional set of non-default build commands that you want to run locally before deployment. For Python wheel builds, the Databricks CLI assumes that it can find a local install of the Python wheel package to run builds, and it runs the command python setup.py bdist_wheel by default during each bundle deployment. To specify multiple build commands, separate each command with double-ampersand (&&) characters. + + * - executable + - String + - The executable type. + + * - files + - Sequence + - The source files for the artifact, defined as an [_](#artifact_file). + + * - path + - String + - The location where the built artifact will be saved. + + * - type + - String + - The type of the artifact. Valid values are `wheel` or `jar` + + ## bundle The attributes of the bundle. See [_](/dev-tools/bundles/settings.md#bundle) + #### Attributes -| Key | Type | Description | -|--------------------------|--------|------------------------------------------------------------------------------------------------------------------------------------------------------------| -| `cluster_id` | String | The ID of a cluster to use to run the bundle. See [_](/dev-tools/bundles/settings.md#cluster_id). | -| `compute_id` | String | - | -| `databricks_cli_version` | String | The Databricks CLI version to use for the bundle. See [_](/dev-tools/bundles/settings.md#databricks_cli_version). | -| `deployment` | Map | The definition of the bundle deployment. For supported attributes, see [_](#deployment) and [_](/dev-tools/bundles/deployment-modes.md). | -| `git` | Map | The Git version control details that are associated with your bundle. For supported attributes, see [_](#git) and [_](/dev-tools/bundles/settings.md#git). | -| `name` | String | The name of the bundle. | -| `uuid` | String | - | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - cluster_id + - String + - The ID of a cluster to use to run the bundle. See [_](/dev-tools/bundles/settings.md#cluster_id). + + * - compute_id + - String + - + + * - databricks_cli_version + - String + - The Databricks CLI version to use for the bundle. See [_](/dev-tools/bundles/settings.md#databricks_cli_version). + + * - deployment + - Map + - The definition of the bundle deployment. For supported attributes, see [_](#deployment) and [_](/dev-tools/bundles/deployment-modes.md). + + * - git + - Map + - The Git version control details that are associated with your bundle. For supported attributes, see [_](#git) and [_](/dev-tools/bundles/settings.md#git). + + * - name + - String + - The name of the bundle. + + * - uuid + - String + - + + ### bundle.deployment The definition of the bundle deployment + #### Attributes -| Key | Type | Description | -|-----------------------|---------|---------------------------------------------------------------------------------------------------------| -| `fail_on_active_runs` | Boolean | Whether to fail on active runs. If this is set to true a deployment that is running can be interrupted. | -| `lock` | Map | The deployment lock attributes. See [_](#lock). | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - fail_on_active_runs + - Boolean + - Whether to fail on active runs. If this is set to true a deployment that is running can be interrupted. + + * - lock + - Map + - The deployment lock attributes. See [_](#lock). + + ### bundle.deployment.lock The deployment lock attributes. + #### Attributes -| Key | Type | Description | -|-----------|---------|----------------------------------------------| -| `enabled` | Boolean | Whether this lock is enabled. | -| `force` | Boolean | Whether to force this lock if it is enabled. | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - enabled + - Boolean + - Whether this lock is enabled. + + * - force + - Boolean + - Whether to force this lock if it is enabled. + + ### bundle.git The Git version control details that are associated with your bundle. + #### Attributes -| Key | Type | Description | -|--------------|--------|--------------------------------------------------------------------------------| -| `branch` | String | The Git branch name. See [_](/dev-tools/bundles/settings.md#git). | -| `origin_url` | String | The origin URL of the repository. See [_](/dev-tools/bundles/settings.md#git). | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - branch + - String + - The Git branch name. See [_](/dev-tools/bundles/settings.md#git). + + * - origin_url + - String + - The origin URL of the repository. See [_](/dev-tools/bundles/settings.md#git). + + ## experimental Defines attributes for experimental features. + #### Attributes -| Key | Type | Description | -|------------------------|---------|-------------------------------------------| -| `pydabs` | Map | The PyDABs configuration. | -| `python_wheel_wrapper` | Boolean | Whether to use a Python wheel wrapper | -| `scripts` | Map | The commands to run | -| `use_legacy_run_as` | Boolean | Whether to use the legacy run_as behavior | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - pydabs + - Map + - The PyDABs configuration. + + * - python_wheel_wrapper + - Boolean + - Whether to use a Python wheel wrapper + + * - scripts + - Map + - The commands to run + + * - use_legacy_run_as + - Boolean + - Whether to use the legacy run_as behavior + + ### experimental.pydabs The PyDABs configuration. + #### Attributes -| Key | Type | Description | -|-------------|----------|-------------------------------------------------------------------------------------| -| `enabled` | Boolean | Whether or not PyDABs (Private Preview) is enabled | -| `import` | Sequence | The PyDABs project to import to discover resources, resource generator and mutators | -| `venv_path` | String | The Python virtual environment path | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - enabled + - Boolean + - Whether or not PyDABs (Private Preview) is enabled + + * - import + - Sequence + - The PyDABs project to import to discover resources, resource generator and mutators + + * - venv_path + - String + - The Python virtual environment path + + ### experimental.scripts The commands to run + + ## include Specifies a list of path globs that contain configuration files to include within the bundle. See [_](/dev-tools/bundles/settings.md#include) + + ## permissions Defines the permissions to apply to experiments, jobs, pipelines, and models defined in the bundle. See [_](/dev-tools/bundles/settings.md#permissions) and [_](/dev-tools/bundles/permissions.md). + Each item of `permissions` has the following attributes: -| Key | Type | Description | -|--------------------------|--------|----------------------------------------------------------------------------------------| -| `group_name` | String | The name of the group that has the permission set in level. | -| `level` | String | The allowed permission for user, group, service principal defined for this permission. | -| `service_principal_name` | String | The name of the service principal that has the permission set in level. | -| `user_name` | String | The name of the user that has the permission set in level. | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - group_name + - String + - The name of the group that has the permission set in level. + + * - level + - String + - The allowed permission for user, group, service principal defined for this permission. + + * - service_principal_name + - String + - The name of the service principal that has the permission set in level. + + * - user_name + - String + - The name of the user that has the permission set in level. + + ## presets Defines bundle deployment presets. See [_](/dev-tools/bundles/deployment-modes.md#presets). + #### Attributes -| Key | Type | Description | -|----------------------------|---------|-------------------------------------------------------------------------------------------------| -| `jobs_max_concurrent_runs` | Integer | The maximum concurrent runs for a job. | -| `name_prefix` | String | The prefix for job runs of the bundle. | -| `pipelines_development` | Boolean | Whether pipeline deployments should be locked in development mode. | -| `source_linked_deployment` | Boolean | Whether to link the deployment to the bundle source. | -| `tags` | Map | The tags for the bundle deployment. | -| `trigger_pause_status` | String | A pause status to apply to all job triggers and schedules. Valid values are PAUSED or UNPAUSED. | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - jobs_max_concurrent_runs + - Integer + - The maximum concurrent runs for a job. + + * - name_prefix + - String + - The prefix for job runs of the bundle. + + * - pipelines_development + - Boolean + - Whether pipeline deployments should be locked in development mode. + + * - source_linked_deployment + - Boolean + - Whether to link the deployment to the bundle source. + + * - tags + - Map + - The tags for the bundle deployment. + + * - trigger_pause_status + - String + - A pause status to apply to all job triggers and schedules. Valid values are PAUSED or UNPAUSED. + + ### presets.tags The tags for the bundle deployment. + + ## resources Specifies information about the Databricks resources used by the bundle. See [_](/dev-tools/bundles/resources.md). + #### Attributes -| Key | Type | Description | -|---------------------------|------|------------------------------------------------------------------------------------------------------------------------| -| `clusters` | Map | The cluster definitions for the bundle. See [_](/dev-tools/bundles/resources.md#cluster) | -| `dashboards` | Map | The dashboard definitions for the bundle. See [_](/dev-tools/bundles/resources.md#dashboard) | -| `experiments` | Map | The experiment definitions for the bundle. See [_](/dev-tools/bundles/resources.md#experiment) | -| `jobs` | Map | The job definitions for the bundle. See [_](/dev-tools/bundles/resources.md#job) | -| `model_serving_endpoints` | Map | The model serving endpoint definitions for the bundle. See [_](/dev-tools/bundles/resources.md#model_serving_endpoint) | -| `models` | Map | The model definitions for the bundle. See [_](/dev-tools/bundles/resources.md#model) | -| `pipelines` | Map | The pipeline definitions for the bundle. See [_](/dev-tools/bundles/resources.md#pipeline) | -| `quality_monitors` | Map | The quality monitor definitions for the bundle. See [_](/dev-tools/bundles/resources.md#quality_monitor) | -| `registered_models` | Map | The registered model definitions for the bundle. See [_](/dev-tools/bundles/resources.md#registered_model) | -| `schemas` | Map | The schema definitions for the bundle. See [_](/dev-tools/bundles/resources.md#schema) | -| `volumes` | Map | - | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - clusters + - Map + - The cluster definitions for the bundle. See [_](/dev-tools/bundles/resources.md#cluster) + + * - dashboards + - Map + - The dashboard definitions for the bundle. See [_](/dev-tools/bundles/resources.md#dashboard) + + * - experiments + - Map + - The experiment definitions for the bundle. See [_](/dev-tools/bundles/resources.md#experiment) + + * - jobs + - Map + - The job definitions for the bundle. See [_](/dev-tools/bundles/resources.md#job) + + * - model_serving_endpoints + - Map + - The model serving endpoint definitions for the bundle. See [_](/dev-tools/bundles/resources.md#model_serving_endpoint) + + * - models + - Map + - The model definitions for the bundle. See [_](/dev-tools/bundles/resources.md#model) + + * - pipelines + - Map + - The pipeline definitions for the bundle. See [_](/dev-tools/bundles/resources.md#pipeline) + + * - quality_monitors + - Map + - The quality monitor definitions for the bundle. See [_](/dev-tools/bundles/resources.md#quality_monitor) + + * - registered_models + - Map + - The registered model definitions for the bundle. See [_](/dev-tools/bundles/resources.md#registered_model) + + * - schemas + - Map + - The schema definitions for the bundle. See [_](/dev-tools/bundles/resources.md#schema) + + * - volumes + - Map + - + + ### resources.clusters The cluster definitions for the bundle. -| Key | Type | Description | -|----------|------|----------------------------| -| `` | Map | The definition of the item | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - + - Map + - The definition of the item + Each item has the following attributes: -| Key | Type | Description | -|--------------------------------|----------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| `apply_policy_default_values` | Boolean | When set to true, fixed and default values from the policy will be used for fields that are omitted. When set to false, only fixed values from the policy will be applied. | -| `autoscale` | Map | Parameters needed in order to automatically scale clusters up and down based on load. Note: autoscaling works best with DB runtime versions 3.0 or later. | -| `autotermination_minutes` | Integer | Automatically terminates the cluster after it is inactive for this time in minutes. If not set, this cluster will not be automatically terminated. If specified, the threshold must be between 10 and 10000 minutes. Users can also set this value to 0 to explicitly disable automatic termination. | -| `aws_attributes` | Map | Attributes related to clusters running on Amazon Web Services. If not specified at cluster creation, a set of default values will be used. | -| `azure_attributes` | Map | Attributes related to clusters running on Microsoft Azure. If not specified at cluster creation, a set of default values will be used. | -| `cluster_log_conf` | Map | The configuration for delivering spark logs to a long-term storage destination. Two kinds of destinations (dbfs and s3) are supported. Only one destination can be specified for one cluster. If the conf is given, the logs will be delivered to the destination every `5 mins`. The destination of driver logs is `$destination/$clusterId/driver`, while the destination of executor logs is `$destination/$clusterId/executor`. | -| `cluster_name` | String | Cluster name requested by the user. This doesn't have to be unique. If not specified at creation, the cluster name will be an empty string. | -| `custom_tags` | Map | Additional tags for cluster resources. Databricks will tag all cluster resources (e.g., AWS instances and EBS volumes) with these tags in addition to `default_tags`. Notes: - Currently, Databricks allows at most 45 custom tags - Clusters can only reuse cloud resources if the resources' tags are a subset of the cluster tags | -| `data_security_mode` | String | - | -| `docker_image` | Map | - | -| `driver_instance_pool_id` | String | The optional ID of the instance pool for the driver of the cluster belongs. The pool cluster uses the instance pool with id (instance_pool_id) if the driver pool is not assigned. | -| `driver_node_type_id` | String | The node type of the Spark driver. Note that this field is optional; if unset, the driver node type will be set as the same value as `node_type_id` defined above. | -| `enable_elastic_disk` | Boolean | Autoscaling Local Storage: when enabled, this cluster will dynamically acquire additional disk space when its Spark workers are running low on disk space. This feature requires specific AWS permissions to function correctly - refer to the User Guide for more details. | -| `enable_local_disk_encryption` | Boolean | Whether to enable LUKS on cluster VMs' local disks | -| `gcp_attributes` | Map | Attributes related to clusters running on Google Cloud Platform. If not specified at cluster creation, a set of default values will be used. | -| `init_scripts` | Sequence | The configuration for storing init scripts. Any number of destinations can be specified. The scripts are executed sequentially in the order provided. If `cluster_log_conf` is specified, init script logs are sent to `//init_scripts`. | -| `instance_pool_id` | String | The optional ID of the instance pool to which the cluster belongs. | -| `node_type_id` | String | This field encodes, through a single value, the resources available to each of the Spark nodes in this cluster. For example, the Spark nodes can be provisioned and optimized for memory or compute intensive workloads. A list of available node types can be retrieved by using the :method:clusters/listNodeTypes API call. | -| `num_workers` | Integer | Number of worker nodes that this cluster should have. A cluster has one Spark Driver and `num_workers` Executors for a total of `num_workers` + 1 Spark nodes. Note: When reading the properties of a cluster, this field reflects the desired number of workers rather than the actual current number of workers. For instance, if a cluster is resized from 5 to 10 workers, this field will immediately be updated to reflect the target size of 10 workers, whereas the workers listed in `spark_info` will gradually increase from 5 to 10 as the new nodes are provisioned. | -| `permissions` | Sequence | - | -| `policy_id` | String | The ID of the cluster policy used to create the cluster if applicable. | -| `runtime_engine` | String | - | -| `single_user_name` | String | Single user name if data_security_mode is `SINGLE_USER` | -| `spark_conf` | Map | An object containing a set of optional, user-specified Spark configuration key-value pairs. Users can also pass in a string of extra JVM options to the driver and the executors via `spark.driver.extraJavaOptions` and `spark.executor.extraJavaOptions` respectively. | -| `spark_env_vars` | Map | An object containing a set of optional, user-specified environment variable key-value pairs. Please note that key-value pair of the form (X,Y) will be exported as is (i.e., `export X='Y'`) while launching the driver and workers. In order to specify an additional set of `SPARK_DAEMON_JAVA_OPTS`, we recommend appending them to `$SPARK_DAEMON_JAVA_OPTS` as shown in the example below. This ensures that all default databricks managed environmental variables are included as well. Example Spark environment variables: `{"SPARK_WORKER_MEMORY": "28000m", "SPARK_LOCAL_DIRS": "/local_disk0"}` or `{"SPARK_DAEMON_JAVA_OPTS": "$SPARK_DAEMON_JAVA_OPTS -Dspark.shuffle.service.enabled=true"}` | -| `spark_version` | String | The Spark version of the cluster, e.g. `3.3.x-scala2.11`. A list of available Spark versions can be retrieved by using the :method:clusters/sparkVersions API call. | -| `ssh_public_keys` | Sequence | SSH public key contents that will be added to each Spark node in this cluster. The corresponding private keys can be used to login with the user name `ubuntu` on port `2200`. Up to 10 keys can be specified. | -| `workload_type` | Map | - | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - apply_policy_default_values + - Boolean + - When set to true, fixed and default values from the policy will be used for fields that are omitted. When set to false, only fixed values from the policy will be applied. + + * - autoscale + - Map + - Parameters needed in order to automatically scale clusters up and down based on load. Note: autoscaling works best with DB runtime versions 3.0 or later. + + * - autotermination_minutes + - Integer + - Automatically terminates the cluster after it is inactive for this time in minutes. If not set, this cluster will not be automatically terminated. If specified, the threshold must be between 10 and 10000 minutes. Users can also set this value to 0 to explicitly disable automatic termination. + + * - aws_attributes + - Map + - Attributes related to clusters running on Amazon Web Services. If not specified at cluster creation, a set of default values will be used. + + * - azure_attributes + - Map + - Attributes related to clusters running on Microsoft Azure. If not specified at cluster creation, a set of default values will be used. + + * - cluster_log_conf + - Map + - The configuration for delivering spark logs to a long-term storage destination. Two kinds of destinations (dbfs and s3) are supported. Only one destination can be specified for one cluster. If the conf is given, the logs will be delivered to the destination every `5 mins`. The destination of driver logs is `$destination/$clusterId/driver`, while the destination of executor logs is `$destination/$clusterId/executor`. + + * - cluster_name + - String + - Cluster name requested by the user. This doesn't have to be unique. If not specified at creation, the cluster name will be an empty string. + + * - custom_tags + - Map + - Additional tags for cluster resources. Databricks will tag all cluster resources (e.g., AWS instances and EBS volumes) with these tags in addition to `default_tags`. Notes: - Currently, Databricks allows at most 45 custom tags - Clusters can only reuse cloud resources if the resources' tags are a subset of the cluster tags + + * - data_security_mode + - String + - + + * - docker_image + - Map + - + + * - driver_instance_pool_id + - String + - The optional ID of the instance pool for the driver of the cluster belongs. The pool cluster uses the instance pool with id (instance_pool_id) if the driver pool is not assigned. + + * - driver_node_type_id + - String + - The node type of the Spark driver. Note that this field is optional; if unset, the driver node type will be set as the same value as `node_type_id` defined above. + + * - enable_elastic_disk + - Boolean + - Autoscaling Local Storage: when enabled, this cluster will dynamically acquire additional disk space when its Spark workers are running low on disk space. This feature requires specific AWS permissions to function correctly - refer to the User Guide for more details. + + * - enable_local_disk_encryption + - Boolean + - Whether to enable LUKS on cluster VMs' local disks + + * - gcp_attributes + - Map + - Attributes related to clusters running on Google Cloud Platform. If not specified at cluster creation, a set of default values will be used. + + * - init_scripts + - Sequence + - The configuration for storing init scripts. Any number of destinations can be specified. The scripts are executed sequentially in the order provided. If `cluster_log_conf` is specified, init script logs are sent to `//init_scripts`. + + * - instance_pool_id + - String + - The optional ID of the instance pool to which the cluster belongs. + + * - node_type_id + - String + - This field encodes, through a single value, the resources available to each of the Spark nodes in this cluster. For example, the Spark nodes can be provisioned and optimized for memory or compute intensive workloads. A list of available node types can be retrieved by using the :method:clusters/listNodeTypes API call. + + * - num_workers + - Integer + - Number of worker nodes that this cluster should have. A cluster has one Spark Driver and `num_workers` Executors for a total of `num_workers` + 1 Spark nodes. Note: When reading the properties of a cluster, this field reflects the desired number of workers rather than the actual current number of workers. For instance, if a cluster is resized from 5 to 10 workers, this field will immediately be updated to reflect the target size of 10 workers, whereas the workers listed in `spark_info` will gradually increase from 5 to 10 as the new nodes are provisioned. + + * - permissions + - Sequence + - + + * - policy_id + - String + - The ID of the cluster policy used to create the cluster if applicable. + + * - runtime_engine + - String + - + + * - single_user_name + - String + - Single user name if data_security_mode is `SINGLE_USER` + + * - spark_conf + - Map + - An object containing a set of optional, user-specified Spark configuration key-value pairs. Users can also pass in a string of extra JVM options to the driver and the executors via `spark.driver.extraJavaOptions` and `spark.executor.extraJavaOptions` respectively. + + * - spark_env_vars + - Map + - An object containing a set of optional, user-specified environment variable key-value pairs. Please note that key-value pair of the form (X,Y) will be exported as is (i.e., `export X='Y'`) while launching the driver and workers. In order to specify an additional set of `SPARK_DAEMON_JAVA_OPTS`, we recommend appending them to `$SPARK_DAEMON_JAVA_OPTS` as shown in the example below. This ensures that all default databricks managed environmental variables are included as well. Example Spark environment variables: `{"SPARK_WORKER_MEMORY": "28000m", "SPARK_LOCAL_DIRS": "/local_disk0"}` or `{"SPARK_DAEMON_JAVA_OPTS": "$SPARK_DAEMON_JAVA_OPTS -Dspark.shuffle.service.enabled=true"}` + + * - spark_version + - String + - The Spark version of the cluster, e.g. `3.3.x-scala2.11`. A list of available Spark versions can be retrieved by using the :method:clusters/sparkVersions API call. + + * - ssh_public_keys + - Sequence + - SSH public key contents that will be added to each Spark node in this cluster. The corresponding private keys can be used to login with the user name `ubuntu` on port `2200`. Up to 10 keys can be specified. + + * - workload_type + - Map + - + + ### resources.clusters.autoscale Parameters needed in order to automatically scale clusters up and down based on load. Note: autoscaling works best with DB runtime versions 3.0 or later. + #### Attributes -| Key | Type | Description | -|---------------|---------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| `max_workers` | Integer | The maximum number of workers to which the cluster can scale up when overloaded. Note that `max_workers` must be strictly greater than `min_workers`. | -| `min_workers` | Integer | The minimum number of workers to which the cluster can scale down when underutilized. It is also the initial number of workers the cluster will have after creation. | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - max_workers + - Integer + - The maximum number of workers to which the cluster can scale up when overloaded. Note that `max_workers` must be strictly greater than `min_workers`. + + * - min_workers + - Integer + - The minimum number of workers to which the cluster can scale down when underutilized. It is also the initial number of workers the cluster will have after creation. + + ### resources.clusters.aws_attributes Attributes related to clusters running on Amazon Web Services. If not specified at cluster creation, a set of default values will be used. + #### Attributes -| Key | Type | Description | -|--------------------------|---------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| `availability` | String | - | -| `ebs_volume_count` | Integer | The number of volumes launched for each instance. Users can choose up to 10 volumes. This feature is only enabled for supported node types. Legacy node types cannot specify custom EBS volumes. For node types with no instance store, at least one EBS volume needs to be specified; otherwise, cluster creation will fail. These EBS volumes will be mounted at `/ebs0`, `/ebs1`, and etc. Instance store volumes will be mounted at `/local_disk0`, `/local_disk1`, and etc. If EBS volumes are attached, Databricks will configure Spark to use only the EBS volumes for scratch storage because heterogenously sized scratch devices can lead to inefficient disk utilization. If no EBS volumes are attached, Databricks will configure Spark to use instance store volumes. Please note that if EBS volumes are specified, then the Spark configuration `spark.local.dir` will be overridden. | -| `ebs_volume_iops` | Integer | If using gp3 volumes, what IOPS to use for the disk. If this is not set, the maximum performance of a gp2 volume with the same volume size will be used. | -| `ebs_volume_size` | Integer | The size of each EBS volume (in GiB) launched for each instance. For general purpose SSD, this value must be within the range 100 - 4096. For throughput optimized HDD, this value must be within the range 500 - 4096. | -| `ebs_volume_throughput` | Integer | If using gp3 volumes, what throughput to use for the disk. If this is not set, the maximum performance of a gp2 volume with the same volume size will be used. | -| `ebs_volume_type` | String | - | -| `first_on_demand` | Integer | The first `first_on_demand` nodes of the cluster will be placed on on-demand instances. If this value is greater than 0, the cluster driver node in particular will be placed on an on-demand instance. If this value is greater than or equal to the current cluster size, all nodes will be placed on on-demand instances. If this value is less than the current cluster size, `first_on_demand` nodes will be placed on on-demand instances and the remainder will be placed on `availability` instances. Note that this value does not affect cluster size and cannot currently be mutated over the lifetime of a cluster. | -| `instance_profile_arn` | String | Nodes for this cluster will only be placed on AWS instances with this instance profile. If ommitted, nodes will be placed on instances without an IAM instance profile. The instance profile must have previously been added to the Databricks environment by an account administrator. This feature may only be available to certain customer plans. If this field is ommitted, we will pull in the default from the conf if it exists. | -| `spot_bid_price_percent` | Integer | The bid price for AWS spot instances, as a percentage of the corresponding instance type's on-demand price. For example, if this field is set to 50, and the cluster needs a new `r3.xlarge` spot instance, then the bid price is half of the price of on-demand `r3.xlarge` instances. Similarly, if this field is set to 200, the bid price is twice the price of on-demand `r3.xlarge` instances. If not specified, the default value is 100. When spot instances are requested for this cluster, only spot instances whose bid price percentage matches this field will be considered. Note that, for safety, we enforce this field to be no more than 10000. The default value and documentation here should be kept consistent with CommonConf.defaultSpotBidPricePercent and CommonConf.maxSpotBidPricePercent. | -| `zone_id` | String | Identifier for the availability zone/datacenter in which the cluster resides. This string will be of a form like "us-west-2a". The provided availability zone must be in the same region as the Databricks deployment. For example, "us-west-2a" is not a valid zone id if the Databricks deployment resides in the "us-east-1" region. This is an optional field at cluster creation, and if not specified, a default zone will be used. If the zone specified is "auto", will try to place cluster in a zone with high availability, and will retry placement in a different AZ if there is not enough capacity. The list of available zones as well as the default value can be found by using the `List Zones` method. | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - availability + - String + - + + * - ebs_volume_count + - Integer + - The number of volumes launched for each instance. Users can choose up to 10 volumes. This feature is only enabled for supported node types. Legacy node types cannot specify custom EBS volumes. For node types with no instance store, at least one EBS volume needs to be specified; otherwise, cluster creation will fail. These EBS volumes will be mounted at `/ebs0`, `/ebs1`, and etc. Instance store volumes will be mounted at `/local_disk0`, `/local_disk1`, and etc. If EBS volumes are attached, Databricks will configure Spark to use only the EBS volumes for scratch storage because heterogenously sized scratch devices can lead to inefficient disk utilization. If no EBS volumes are attached, Databricks will configure Spark to use instance store volumes. Please note that if EBS volumes are specified, then the Spark configuration `spark.local.dir` will be overridden. + + * - ebs_volume_iops + - Integer + - If using gp3 volumes, what IOPS to use for the disk. If this is not set, the maximum performance of a gp2 volume with the same volume size will be used. + + * - ebs_volume_size + - Integer + - The size of each EBS volume (in GiB) launched for each instance. For general purpose SSD, this value must be within the range 100 - 4096. For throughput optimized HDD, this value must be within the range 500 - 4096. + + * - ebs_volume_throughput + - Integer + - If using gp3 volumes, what throughput to use for the disk. If this is not set, the maximum performance of a gp2 volume with the same volume size will be used. + + * - ebs_volume_type + - String + - + + * - first_on_demand + - Integer + - The first `first_on_demand` nodes of the cluster will be placed on on-demand instances. If this value is greater than 0, the cluster driver node in particular will be placed on an on-demand instance. If this value is greater than or equal to the current cluster size, all nodes will be placed on on-demand instances. If this value is less than the current cluster size, `first_on_demand` nodes will be placed on on-demand instances and the remainder will be placed on `availability` instances. Note that this value does not affect cluster size and cannot currently be mutated over the lifetime of a cluster. + + * - instance_profile_arn + - String + - Nodes for this cluster will only be placed on AWS instances with this instance profile. If ommitted, nodes will be placed on instances without an IAM instance profile. The instance profile must have previously been added to the Databricks environment by an account administrator. This feature may only be available to certain customer plans. If this field is ommitted, we will pull in the default from the conf if it exists. + + * - spot_bid_price_percent + - Integer + - The bid price for AWS spot instances, as a percentage of the corresponding instance type's on-demand price. For example, if this field is set to 50, and the cluster needs a new `r3.xlarge` spot instance, then the bid price is half of the price of on-demand `r3.xlarge` instances. Similarly, if this field is set to 200, the bid price is twice the price of on-demand `r3.xlarge` instances. If not specified, the default value is 100. When spot instances are requested for this cluster, only spot instances whose bid price percentage matches this field will be considered. Note that, for safety, we enforce this field to be no more than 10000. The default value and documentation here should be kept consistent with CommonConf.defaultSpotBidPricePercent and CommonConf.maxSpotBidPricePercent. + + * - zone_id + - String + - Identifier for the availability zone/datacenter in which the cluster resides. This string will be of a form like "us-west-2a". The provided availability zone must be in the same region as the Databricks deployment. For example, "us-west-2a" is not a valid zone id if the Databricks deployment resides in the "us-east-1" region. This is an optional field at cluster creation, and if not specified, a default zone will be used. If the zone specified is "auto", will try to place cluster in a zone with high availability, and will retry placement in a different AZ if there is not enough capacity. The list of available zones as well as the default value can be found by using the `List Zones` method. + + ### resources.clusters.azure_attributes Attributes related to clusters running on Microsoft Azure. If not specified at cluster creation, a set of default values will be used. + #### Attributes -| Key | Type | Description | -|----------------------|---------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| `availability` | String | - | -| `first_on_demand` | Integer | The first `first_on_demand` nodes of the cluster will be placed on on-demand instances. This value should be greater than 0, to make sure the cluster driver node is placed on an on-demand instance. If this value is greater than or equal to the current cluster size, all nodes will be placed on on-demand instances. If this value is less than the current cluster size, `first_on_demand` nodes will be placed on on-demand instances and the remainder will be placed on `availability` instances. Note that this value does not affect cluster size and cannot currently be mutated over the lifetime of a cluster. | -| `log_analytics_info` | Map | Defines values necessary to configure and run Azure Log Analytics agent | -| `spot_bid_max_price` | Any | The max bid price to be used for Azure spot instances. The Max price for the bid cannot be higher than the on-demand price of the instance. If not specified, the default value is -1, which specifies that the instance cannot be evicted on the basis of price, and only on the basis of availability. Further, the value should > 0 or -1. | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - availability + - String + - + + * - first_on_demand + - Integer + - The first `first_on_demand` nodes of the cluster will be placed on on-demand instances. This value should be greater than 0, to make sure the cluster driver node is placed on an on-demand instance. If this value is greater than or equal to the current cluster size, all nodes will be placed on on-demand instances. If this value is less than the current cluster size, `first_on_demand` nodes will be placed on on-demand instances and the remainder will be placed on `availability` instances. Note that this value does not affect cluster size and cannot currently be mutated over the lifetime of a cluster. + + * - log_analytics_info + - Map + - Defines values necessary to configure and run Azure Log Analytics agent + + * - spot_bid_max_price + - Any + - The max bid price to be used for Azure spot instances. The Max price for the bid cannot be higher than the on-demand price of the instance. If not specified, the default value is -1, which specifies that the instance cannot be evicted on the basis of price, and only on the basis of availability. Further, the value should > 0 or -1. + + ### resources.clusters.azure_attributes.log_analytics_info Defines values necessary to configure and run Azure Log Analytics agent + #### Attributes -| Key | Type | Description | -|------------------------------|--------|-----------------------| -| `log_analytics_primary_key` | String | | -| `log_analytics_workspace_id` | String | | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - log_analytics_primary_key + - String + - + + * - log_analytics_workspace_id + - String + - + + ### resources.clusters.cluster_log_conf The configuration for delivering spark logs to a long-term storage destination. Two kinds of destinations (dbfs and s3) are supported. Only one destination can be specified for one cluster. If the conf is given, the logs will be delivered to the destination every `5 mins`. The destination of driver logs is `$destination/$clusterId/driver`, while the destination of executor logs is `$destination/$clusterId/executor`. + #### Attributes -| Key | Type | Description | -|--------|------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| `dbfs` | Map | destination needs to be provided. e.g. `{ "dbfs" : { "destination" : "dbfs:/home/cluster_log" } }` | -| `s3` | Map | destination and either the region or endpoint need to be provided. e.g. `{ "s3": { "destination" : "s3://cluster_log_bucket/prefix", "region" : "us-west-2" } }` Cluster iam role is used to access s3, please make sure the cluster iam role in `instance_profile_arn` has permission to write data to the s3 destination. | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - dbfs + - Map + - destination needs to be provided. e.g. `{ "dbfs" : { "destination" : "dbfs:/home/cluster_log" } }` + + * - s3 + - Map + - destination and either the region or endpoint need to be provided. e.g. `{ "s3": { "destination" : "s3://cluster_log_bucket/prefix", "region" : "us-west-2" } }` Cluster iam role is used to access s3, please make sure the cluster iam role in `instance_profile_arn` has permission to write data to the s3 destination. + + ### resources.clusters.cluster_log_conf.dbfs destination needs to be provided. e.g. `{ "dbfs" : { "destination" : "dbfs:/home/cluster_log" } }` + #### Attributes -| Key | Type | Description | -|---------------|--------|----------------------------------------| -| `destination` | String | dbfs destination, e.g. `dbfs:/my/path` | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - destination + - String + - dbfs destination, e.g. `dbfs:/my/path` + + ### resources.clusters.cluster_log_conf.s3 destination and either the region or endpoint need to be provided. e.g. `{ "s3": { "destination" : "s3://cluster_log_bucket/prefix", "region" : "us-west-2" } }` Cluster iam role is used to access s3, please make sure the cluster iam role in `instance_profile_arn` has permission to write data to the s3 destination. + #### Attributes -| Key | Type | Description | -|---------------------|---------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| `canned_acl` | String | (Optional) Set canned access control list for the logs, e.g. `bucket-owner-full-control`. If `canned_cal` is set, please make sure the cluster iam role has `s3:PutObjectAcl` permission on the destination bucket and prefix. The full list of possible canned acl can be found at http://docs.aws.amazon.com/AmazonS3/latest/dev/acl-overview.html#canned-acl. Please also note that by default only the object owner gets full controls. If you are using cross account role for writing data, you may want to set `bucket-owner-full-control` to make bucket owner able to read the logs. | -| `destination` | String | S3 destination, e.g. `s3://my-bucket/some-prefix` Note that logs will be delivered using cluster iam role, please make sure you set cluster iam role and the role has write access to the destination. Please also note that you cannot use AWS keys to deliver logs. | -| `enable_encryption` | Boolean | (Optional) Flag to enable server side encryption, `false` by default. | -| `encryption_type` | String | (Optional) The encryption type, it could be `sse-s3` or `sse-kms`. It will be used only when encryption is enabled and the default type is `sse-s3`. | -| `endpoint` | String | S3 endpoint, e.g. `https://s3-us-west-2.amazonaws.com`. Either region or endpoint needs to be set. If both are set, endpoint will be used. | -| `kms_key` | String | (Optional) Kms key which will be used if encryption is enabled and encryption type is set to `sse-kms`. | -| `region` | String | S3 region, e.g. `us-west-2`. Either region or endpoint needs to be set. If both are set, endpoint will be used. | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - canned_acl + - String + - (Optional) Set canned access control list for the logs, e.g. `bucket-owner-full-control`. If `canned_cal` is set, please make sure the cluster iam role has `s3:PutObjectAcl` permission on the destination bucket and prefix. The full list of possible canned acl can be found at http://docs.aws.amazon.com/AmazonS3/latest/dev/acl-overview.html#canned-acl. Please also note that by default only the object owner gets full controls. If you are using cross account role for writing data, you may want to set `bucket-owner-full-control` to make bucket owner able to read the logs. + + * - destination + - String + - S3 destination, e.g. `s3://my-bucket/some-prefix` Note that logs will be delivered using cluster iam role, please make sure you set cluster iam role and the role has write access to the destination. Please also note that you cannot use AWS keys to deliver logs. + + * - enable_encryption + - Boolean + - (Optional) Flag to enable server side encryption, `false` by default. + + * - encryption_type + - String + - (Optional) The encryption type, it could be `sse-s3` or `sse-kms`. It will be used only when encryption is enabled and the default type is `sse-s3`. + + * - endpoint + - String + - S3 endpoint, e.g. `https://s3-us-west-2.amazonaws.com`. Either region or endpoint needs to be set. If both are set, endpoint will be used. + + * - kms_key + - String + - (Optional) Kms key which will be used if encryption is enabled and encryption type is set to `sse-kms`. + + * - region + - String + - S3 region, e.g. `us-west-2`. Either region or endpoint needs to be set. If both are set, endpoint will be used. + + ### resources.clusters.custom_tags Additional tags for cluster resources. Databricks will tag all cluster resources (e.g., AWS instances and EBS volumes) with these tags in addition to `default_tags`. Notes: @@ -240,40 +713,98 @@ instances and EBS volumes) with these tags in addition to `default_tags`. Notes: - Currently, Databricks allows at most 45 custom tags - Clusters can only reuse cloud resources if the resources' tags are a subset of the cluster tags + + ### resources.clusters.docker_image + #### Attributes -| Key | Type | Description | -|--------------|--------|--------------------------| -| `basic_auth` | Map | - | -| `url` | String | URL of the docker image. | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - basic_auth + - Map + - + + * - url + - String + - URL of the docker image. + + ### resources.clusters.docker_image.basic_auth + #### Attributes -| Key | Type | Description | -|------------|--------|----------------------| -| `password` | String | Password of the user | -| `username` | String | Name of the user | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - password + - String + - Password of the user + + * - username + - String + - Name of the user + + ### resources.clusters.gcp_attributes Attributes related to clusters running on Google Cloud Platform. If not specified at cluster creation, a set of default values will be used. + #### Attributes -| Key | Type | Description | -|-----------------------------|---------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| `availability` | String | - | -| `boot_disk_size` | Integer | boot disk size in GB | -| `google_service_account` | String | If provided, the cluster will impersonate the google service account when accessing gcloud services (like GCS). The google service account must have previously been added to the Databricks environment by an account administrator. | -| `local_ssd_count` | Integer | If provided, each node (workers and driver) in the cluster will have this number of local SSDs attached. Each local SSD is 375GB in size. Refer to [GCP documentation](https://cloud.google.com/compute/docs/disks/local-ssd#choose_number_local_ssds) for the supported number of local SSDs for each instance type. | -| `use_preemptible_executors` | Boolean | This field determines whether the spark executors will be scheduled to run on preemptible VMs (when set to true) versus standard compute engine VMs (when set to false; default). Note: Soon to be deprecated, use the availability field instead. | -| `zone_id` | String | Identifier for the availability zone in which the cluster resides. This can be one of the following: - "HA" => High availability, spread nodes across availability zones for a Databricks deployment region [default] - "AUTO" => Databricks picks an availability zone to schedule the cluster on. - A GCP availability zone => Pick One of the available zones for (machine type + region) from https://cloud.google.com/compute/docs/regions-zones. | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - availability + - String + - + + * - boot_disk_size + - Integer + - boot disk size in GB + + * - google_service_account + - String + - If provided, the cluster will impersonate the google service account when accessing gcloud services (like GCS). The google service account must have previously been added to the Databricks environment by an account administrator. + + * - local_ssd_count + - Integer + - If provided, each node (workers and driver) in the cluster will have this number of local SSDs attached. Each local SSD is 375GB in size. Refer to [GCP documentation](https://cloud.google.com/compute/docs/disks/local-ssd#choose_number_local_ssds) for the supported number of local SSDs for each instance type. + + * - use_preemptible_executors + - Boolean + - This field determines whether the spark executors will be scheduled to run on preemptible VMs (when set to true) versus standard compute engine VMs (when set to false; default). Note: Soon to be deprecated, use the availability field instead. + + * - zone_id + - String + - Identifier for the availability zone in which the cluster resides. This can be one of the following: - "HA" => High availability, spread nodes across availability zones for a Databricks deployment region [default] - "AUTO" => Databricks picks an availability zone to schedule the cluster on. - A GCP availability zone => Pick One of the available zones for (machine type + region) from https://cloud.google.com/compute/docs/regions-zones. + + ### resources.clusters.spark_conf An object containing a set of optional, user-specified Spark configuration key-value pairs. Users can also pass in a string of extra JVM options to the driver and the executors via `spark.driver.extraJavaOptions` and `spark.executor.extraJavaOptions` respectively. + + ### resources.clusters.spark_env_vars An object containing a set of optional, user-specified environment variable key-value pairs. Please note that key-value pair of the form (X,Y) will be exported as is (i.e., @@ -286,855 +817,2573 @@ default databricks managed environmental variables are included as well. Example Spark environment variables: `{"SPARK_WORKER_MEMORY": "28000m", "SPARK_LOCAL_DIRS": "/local_disk0"}` or `{"SPARK_DAEMON_JAVA_OPTS": "$SPARK_DAEMON_JAVA_OPTS -Dspark.shuffle.service.enabled=true"}` + + ### resources.clusters.workload_type + #### Attributes -| Key | Type | Description | -|-----------|------|-------------------------------------------------------------------------| -| `clients` | Map | defined what type of clients can use the cluster. E.g. Notebooks, Jobs | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - clients + - Map + - defined what type of clients can use the cluster. E.g. Notebooks, Jobs + + ### resources.clusters.workload_type.clients defined what type of clients can use the cluster. E.g. Notebooks, Jobs + #### Attributes -| Key | Type | Description | -|-------------|---------|------------------------------------------------------------| -| `jobs` | Boolean | With jobs set, the cluster can be used for jobs | -| `notebooks` | Boolean | With notebooks set, this cluster can be used for notebooks | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - jobs + - Boolean + - With jobs set, the cluster can be used for jobs + + * - notebooks + - Boolean + - With notebooks set, this cluster can be used for notebooks + + ### resources.dashboards The dashboard definitions for the bundle. -| Key | Type | Description | -|----------|------|----------------------------| -| `` | Map | The definition of the item | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - + - Map + - The definition of the item + Each item has the following attributes: -| Key | Type | Description | -|------------------------|----------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| `create_time` | String | The timestamp of when the dashboard was created. | -| `dashboard_id` | String | UUID identifying the dashboard. | -| `display_name` | String | The display name of the dashboard. | -| `embed_credentials` | Boolean | - | -| `etag` | String | The etag for the dashboard. Can be optionally provided on updates to ensure that the dashboard has not been modified since the last read. This field is excluded in List Dashboards responses. | -| `file_path` | String | - | -| `lifecycle_state` | String | The state of the dashboard resource. Used for tracking trashed status. | -| `parent_path` | String | The workspace path of the folder containing the dashboard. Includes leading slash and no trailing slash. This field is excluded in List Dashboards responses. | -| `path` | String | The workspace path of the dashboard asset, including the file name. Exported dashboards always have the file extension `.lvdash.json`. This field is excluded in List Dashboards responses. | -| `permissions` | Sequence | - | -| `serialized_dashboard` | Any | The contents of the dashboard in serialized string form. This field is excluded in List Dashboards responses. Use the [get dashboard API](https://docs.databricks.com/api/workspace/lakeview/get) to retrieve an example response, which includes the `serialized_dashboard` field. This field provides the structure of the JSON string that represents the dashboard's layout and components. | -| `update_time` | String | The timestamp of when the dashboard was last updated by the user. This field is excluded in List Dashboards responses. | -| `warehouse_id` | String | The warehouse ID used to run the dashboard. | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - create_time + - String + - The timestamp of when the dashboard was created. + + * - dashboard_id + - String + - UUID identifying the dashboard. + + * - display_name + - String + - The display name of the dashboard. + + * - embed_credentials + - Boolean + - + + * - etag + - String + - The etag for the dashboard. Can be optionally provided on updates to ensure that the dashboard has not been modified since the last read. This field is excluded in List Dashboards responses. + + * - file_path + - String + - + + * - lifecycle_state + - String + - The state of the dashboard resource. Used for tracking trashed status. + + * - parent_path + - String + - The workspace path of the folder containing the dashboard. Includes leading slash and no trailing slash. This field is excluded in List Dashboards responses. + + * - path + - String + - The workspace path of the dashboard asset, including the file name. Exported dashboards always have the file extension `.lvdash.json`. This field is excluded in List Dashboards responses. + + * - permissions + - Sequence + - + + * - serialized_dashboard + - Any + - The contents of the dashboard in serialized string form. This field is excluded in List Dashboards responses. Use the [get dashboard API](https://docs.databricks.com/api/workspace/lakeview/get) to retrieve an example response, which includes the `serialized_dashboard` field. This field provides the structure of the JSON string that represents the dashboard's layout and components. + + * - update_time + - String + - The timestamp of when the dashboard was last updated by the user. This field is excluded in List Dashboards responses. + + * - warehouse_id + - String + - The warehouse ID used to run the dashboard. + + ### resources.experiments The experiment definitions for the bundle. -| Key | Type | Description | -|----------|------|----------------------------| -| `` | Map | The definition of the item | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - + - Map + - The definition of the item + Each item has the following attributes: -| Key | Type | Description | -|---------------------|----------|------------------------------------------------------------------------------------------------------------------| -| `artifact_location` | String | Location where artifacts for the experiment are stored. | -| `creation_time` | Integer | Creation time | -| `experiment_id` | String | Unique identifier for the experiment. | -| `last_update_time` | Integer | Last update time | -| `lifecycle_stage` | String | Current life cycle stage of the experiment: "active" or "deleted". Deleted experiments are not returned by APIs. | -| `name` | String | Human readable name that identifies the experiment. | -| `permissions` | Sequence | - | -| `tags` | Sequence | Tags: Additional metadata key-value pairs. | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - artifact_location + - String + - Location where artifacts for the experiment are stored. + + * - creation_time + - Integer + - Creation time + + * - experiment_id + - String + - Unique identifier for the experiment. + + * - last_update_time + - Integer + - Last update time + + * - lifecycle_stage + - String + - Current life cycle stage of the experiment: "active" or "deleted". Deleted experiments are not returned by APIs. + + * - name + - String + - Human readable name that identifies the experiment. + + * - permissions + - Sequence + - + + * - tags + - Sequence + - Tags: Additional metadata key-value pairs. + + ### resources.jobs The job definitions for the bundle. -| Key | Type | Description | -|----------|------|----------------------------| -| `` | Map | The definition of the item | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - + - Map + - The definition of the item + Each item has the following attributes: -| Key | Type | Description | -|-------------------------|----------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| `budget_policy_id` | String | The id of the user specified budget policy to use for this job. If not specified, a default budget policy may be applied when creating or modifying the job. See `effective_budget_policy_id` for the budget policy used by this workload. | -| `continuous` | Map | An optional continuous property for this job. The continuous property will ensure that there is always one run executing. Only one of `schedule` and `continuous` can be used. | -| `description` | String | An optional description for the job. The maximum length is 27700 characters in UTF-8 encoding. | -| `email_notifications` | Map | An optional set of email addresses that is notified when runs of this job begin or complete as well as when this job is deleted. | -| `environments` | Sequence | A list of task execution environment specifications that can be referenced by serverless tasks of this job. An environment is required to be present for serverless tasks. For serverless notebook tasks, the environment is accessible in the notebook environment panel. For other serverless tasks, the task environment is required to be specified using environment_key in the task settings. | -| `git_source` | Map | An optional specification for a remote Git repository containing the source code used by tasks. Version-controlled source code is supported by notebook, dbt, Python script, and SQL File tasks. If `git_source` is set, these tasks retrieve the file from the remote repository by default. However, this behavior can be overridden by setting `source` to `WORKSPACE` on the task. Note: dbt and SQL File tasks support only version-controlled sources. If dbt or SQL File tasks are used, `git_source` must be defined on the job. | -| `health` | Map | - | -| `job_clusters` | Sequence | A list of job cluster specifications that can be shared and reused by tasks of this job. Libraries cannot be declared in a shared job cluster. You must declare dependent libraries in task settings. | -| `max_concurrent_runs` | Integer | An optional maximum allowed number of concurrent runs of the job. Set this value if you want to be able to execute multiple runs of the same job concurrently. This is useful for example if you trigger your job on a frequent schedule and want to allow consecutive runs to overlap with each other, or if you want to trigger multiple runs which differ by their input parameters. This setting affects only new runs. For example, suppose the job’s concurrency is 4 and there are 4 concurrent active runs. Then setting the concurrency to 3 won’t kill any of the active runs. However, from then on, new runs are skipped unless there are fewer than 3 active runs. This value cannot exceed 1000. Setting this value to `0` causes all new runs to be skipped. | -| `name` | String | An optional name for the job. The maximum length is 4096 bytes in UTF-8 encoding. | -| `notification_settings` | Map | Optional notification settings that are used when sending notifications to each of the `email_notifications` and `webhook_notifications` for this job. | -| `parameters` | Sequence | Job-level parameter definitions | -| `permissions` | Sequence | - | -| `queue` | Map | The queue settings of the job. | -| `run_as` | Map | - | -| `schedule` | Map | An optional periodic schedule for this job. The default behavior is that the job only runs when triggered by clicking “Run Now” in the Jobs UI or sending an API request to `runNow`. | -| `tags` | Map | A map of tags associated with the job. These are forwarded to the cluster as cluster tags for jobs clusters, and are subject to the same limitations as cluster tags. A maximum of 25 tags can be added to the job. | -| `tasks` | Sequence | A list of task specifications to be executed by this job. | -| `timeout_seconds` | Integer | An optional timeout applied to each run of this job. A value of `0` means no timeout. | -| `trigger` | Map | A configuration to trigger a run when certain conditions are met. The default behavior is that the job runs only when triggered by clicking “Run Now” in the Jobs UI or sending an API request to `runNow`. | -| `webhook_notifications` | Map | A collection of system notification IDs to notify when runs of this job begin or complete. | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - budget_policy_id + - String + - The id of the user specified budget policy to use for this job. If not specified, a default budget policy may be applied when creating or modifying the job. See `effective_budget_policy_id` for the budget policy used by this workload. + + * - continuous + - Map + - An optional continuous property for this job. The continuous property will ensure that there is always one run executing. Only one of `schedule` and `continuous` can be used. + + * - description + - String + - An optional description for the job. The maximum length is 27700 characters in UTF-8 encoding. + + * - email_notifications + - Map + - An optional set of email addresses that is notified when runs of this job begin or complete as well as when this job is deleted. + + * - environments + - Sequence + - A list of task execution environment specifications that can be referenced by serverless tasks of this job. An environment is required to be present for serverless tasks. For serverless notebook tasks, the environment is accessible in the notebook environment panel. For other serverless tasks, the task environment is required to be specified using environment_key in the task settings. + + * - git_source + - Map + - An optional specification for a remote Git repository containing the source code used by tasks. Version-controlled source code is supported by notebook, dbt, Python script, and SQL File tasks. If `git_source` is set, these tasks retrieve the file from the remote repository by default. However, this behavior can be overridden by setting `source` to `WORKSPACE` on the task. Note: dbt and SQL File tasks support only version-controlled sources. If dbt or SQL File tasks are used, `git_source` must be defined on the job. + + * - health + - Map + - + + * - job_clusters + - Sequence + - A list of job cluster specifications that can be shared and reused by tasks of this job. Libraries cannot be declared in a shared job cluster. You must declare dependent libraries in task settings. + + * - max_concurrent_runs + - Integer + - An optional maximum allowed number of concurrent runs of the job. Set this value if you want to be able to execute multiple runs of the same job concurrently. This is useful for example if you trigger your job on a frequent schedule and want to allow consecutive runs to overlap with each other, or if you want to trigger multiple runs which differ by their input parameters. This setting affects only new runs. For example, suppose the job’s concurrency is 4 and there are 4 concurrent active runs. Then setting the concurrency to 3 won’t kill any of the active runs. However, from then on, new runs are skipped unless there are fewer than 3 active runs. This value cannot exceed 1000. Setting this value to `0` causes all new runs to be skipped. + + * - name + - String + - An optional name for the job. The maximum length is 4096 bytes in UTF-8 encoding. + + * - notification_settings + - Map + - Optional notification settings that are used when sending notifications to each of the `email_notifications` and `webhook_notifications` for this job. + + * - parameters + - Sequence + - Job-level parameter definitions + + * - permissions + - Sequence + - + + * - queue + - Map + - The queue settings of the job. + + * - run_as + - Map + - + + * - schedule + - Map + - An optional periodic schedule for this job. The default behavior is that the job only runs when triggered by clicking “Run Now” in the Jobs UI or sending an API request to `runNow`. + + * - tags + - Map + - A map of tags associated with the job. These are forwarded to the cluster as cluster tags for jobs clusters, and are subject to the same limitations as cluster tags. A maximum of 25 tags can be added to the job. + + * - tasks + - Sequence + - A list of task specifications to be executed by this job. + + * - timeout_seconds + - Integer + - An optional timeout applied to each run of this job. A value of `0` means no timeout. + + * - trigger + - Map + - A configuration to trigger a run when certain conditions are met. The default behavior is that the job runs only when triggered by clicking “Run Now” in the Jobs UI or sending an API request to `runNow`. + + * - webhook_notifications + - Map + - A collection of system notification IDs to notify when runs of this job begin or complete. + + ### resources.jobs.continuous An optional continuous property for this job. The continuous property will ensure that there is always one run executing. Only one of `schedule` and `continuous` can be used. + #### Attributes -| Key | Type | Description | -|----------------|--------|----------------------------------------------------------------------------------------------| -| `pause_status` | String | Indicate whether the continuous execution of the job is paused or not. Defaults to UNPAUSED. | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - pause_status + - String + - Indicate whether the continuous execution of the job is paused or not. Defaults to UNPAUSED. + + ### resources.jobs.email_notifications An optional set of email addresses that is notified when runs of this job begin or complete as well as when this job is deleted. + #### Attributes -| Key | Type | Description | -|------------------------------------------|----------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| `no_alert_for_skipped_runs` | Boolean | If true, do not send email to recipients specified in `on_failure` if the run is skipped. This field is `deprecated`. Please use the `notification_settings.no_alert_for_skipped_runs` field. | -| `on_duration_warning_threshold_exceeded` | Sequence | A list of email addresses to be notified when the duration of a run exceeds the threshold specified for the `RUN_DURATION_SECONDS` metric in the `health` field. If no rule for the `RUN_DURATION_SECONDS` metric is specified in the `health` field for the job, notifications are not sent. | -| `on_failure` | Sequence | A list of email addresses to be notified when a run unsuccessfully completes. A run is considered to have completed unsuccessfully if it ends with an `INTERNAL_ERROR` `life_cycle_state` or a `FAILED`, or `TIMED_OUT` result_state. If this is not specified on job creation, reset, or update the list is empty, and notifications are not sent. | -| `on_start` | Sequence | A list of email addresses to be notified when a run begins. If not specified on job creation, reset, or update, the list is empty, and notifications are not sent. | -| `on_streaming_backlog_exceeded` | Sequence | A list of email addresses to notify when any streaming backlog thresholds are exceeded for any stream. Streaming backlog thresholds can be set in the `health` field using the following metrics: `STREAMING_BACKLOG_BYTES`, `STREAMING_BACKLOG_RECORDS`, `STREAMING_BACKLOG_SECONDS`, or `STREAMING_BACKLOG_FILES`. Alerting is based on the 10-minute average of these metrics. If the issue persists, notifications are resent every 30 minutes. | -| `on_success` | Sequence | A list of email addresses to be notified when a run successfully completes. A run is considered to have completed successfully if it ends with a `TERMINATED` `life_cycle_state` and a `SUCCESS` result_state. If not specified on job creation, reset, or update, the list is empty, and notifications are not sent. | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - no_alert_for_skipped_runs + - Boolean + - If true, do not send email to recipients specified in `on_failure` if the run is skipped. This field is `deprecated`. Please use the `notification_settings.no_alert_for_skipped_runs` field. + + * - on_duration_warning_threshold_exceeded + - Sequence + - A list of email addresses to be notified when the duration of a run exceeds the threshold specified for the `RUN_DURATION_SECONDS` metric in the `health` field. If no rule for the `RUN_DURATION_SECONDS` metric is specified in the `health` field for the job, notifications are not sent. + + * - on_failure + - Sequence + - A list of email addresses to be notified when a run unsuccessfully completes. A run is considered to have completed unsuccessfully if it ends with an `INTERNAL_ERROR` `life_cycle_state` or a `FAILED`, or `TIMED_OUT` result_state. If this is not specified on job creation, reset, or update the list is empty, and notifications are not sent. + + * - on_start + - Sequence + - A list of email addresses to be notified when a run begins. If not specified on job creation, reset, or update, the list is empty, and notifications are not sent. + + * - on_streaming_backlog_exceeded + - Sequence + - A list of email addresses to notify when any streaming backlog thresholds are exceeded for any stream. Streaming backlog thresholds can be set in the `health` field using the following metrics: `STREAMING_BACKLOG_BYTES`, `STREAMING_BACKLOG_RECORDS`, `STREAMING_BACKLOG_SECONDS`, or `STREAMING_BACKLOG_FILES`. Alerting is based on the 10-minute average of these metrics. If the issue persists, notifications are resent every 30 minutes. + + * - on_success + - Sequence + - A list of email addresses to be notified when a run successfully completes. A run is considered to have completed successfully if it ends with a `TERMINATED` `life_cycle_state` and a `SUCCESS` result_state. If not specified on job creation, reset, or update, the list is empty, and notifications are not sent. + + ### resources.jobs.git_source An optional specification for a remote Git repository containing the source code used by tasks. Version-controlled source code is supported by notebook, dbt, Python script, and SQL File tasks. If `git_source` is set, these tasks retrieve the file from the remote repository by default. However, this behavior can be overridden by setting `source` to `WORKSPACE` on the task. Note: dbt and SQL File tasks support only version-controlled sources. If dbt or SQL File tasks are used, `git_source` must be defined on the job. + #### Attributes -| Key | Type | Description | -|----------------|--------|--------------------------------------------------------------------------------------------------------------------------------------| -| `git_branch` | String | Name of the branch to be checked out and used by this job. This field cannot be specified in conjunction with git_tag or git_commit. | -| `git_commit` | String | Commit to be checked out and used by this job. This field cannot be specified in conjunction with git_branch or git_tag. | -| `git_provider` | String | Unique identifier of the service used to host the Git repository. The value is case insensitive. | -| `git_tag` | String | Name of the tag to be checked out and used by this job. This field cannot be specified in conjunction with git_branch or git_commit. | -| `git_url` | String | URL of the repository to be cloned by this job. | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - git_branch + - String + - Name of the branch to be checked out and used by this job. This field cannot be specified in conjunction with git_tag or git_commit. + + * - git_commit + - String + - Commit to be checked out and used by this job. This field cannot be specified in conjunction with git_branch or git_tag. + + * - git_provider + - String + - Unique identifier of the service used to host the Git repository. The value is case insensitive. + + * - git_tag + - String + - Name of the tag to be checked out and used by this job. This field cannot be specified in conjunction with git_branch or git_commit. + + * - git_url + - String + - URL of the repository to be cloned by this job. + + ### resources.jobs.health + #### Attributes -| Key | Type | Description | -|---------|----------|-------------| -| `rules` | Sequence | - | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - rules + - Sequence + - + + ### resources.jobs.notification_settings Optional notification settings that are used when sending notifications to each of the `email_notifications` and `webhook_notifications` for this job. + #### Attributes -| Key | Type | Description | -|------------------------------|---------|----------------------------------------------------------------------------------------------------| -| `no_alert_for_canceled_runs` | Boolean | If true, do not send notifications to recipients specified in `on_failure` if the run is canceled. | -| `no_alert_for_skipped_runs` | Boolean | If true, do not send notifications to recipients specified in `on_failure` if the run is skipped. | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - no_alert_for_canceled_runs + - Boolean + - If true, do not send notifications to recipients specified in `on_failure` if the run is canceled. + + * - no_alert_for_skipped_runs + - Boolean + - If true, do not send notifications to recipients specified in `on_failure` if the run is skipped. + + ### resources.jobs.queue The queue settings of the job. + #### Attributes -| Key | Type | Description | -|-----------|---------|-----------------------------------------------------------------| -| `enabled` | Boolean | If true, enable queueing for the job. This is a required field. | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - enabled + - Boolean + - If true, enable queueing for the job. This is a required field. + + ### resources.jobs.run_as + #### Attributes -| Key | Type | Description | -|--------------------------|--------|--------------------------------------------------------------------------------------------------------------| -| `service_principal_name` | String | Application ID of an active service principal. Setting this field requires the `servicePrincipal/user` role. | -| `user_name` | String | The email of an active workspace user. Non-admin users can only set this field to their own email. | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - service_principal_name + - String + - Application ID of an active service principal. Setting this field requires the `servicePrincipal/user` role. + + * - user_name + - String + - The email of an active workspace user. Non-admin users can only set this field to their own email. + + ### resources.jobs.schedule An optional periodic schedule for this job. The default behavior is that the job only runs when triggered by clicking “Run Now” in the Jobs UI or sending an API request to `runNow`. + #### Attributes -| Key | Type | Description | -|--------------------------|--------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| `pause_status` | String | Indicate whether this schedule is paused or not. | -| `quartz_cron_expression` | String | A Cron expression using Quartz syntax that describes the schedule for a job. See [Cron Trigger](http://www.quartz-scheduler.org/documentation/quartz-2.3.0/tutorials/crontrigger.html) for details. This field is required. | -| `timezone_id` | String | A Java timezone ID. The schedule for a job is resolved with respect to this timezone. See [Java TimeZone](https://docs.oracle.com/javase/7/docs/api/java/util/TimeZone.html) for details. This field is required. | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - pause_status + - String + - Indicate whether this schedule is paused or not. + + * - quartz_cron_expression + - String + - A Cron expression using Quartz syntax that describes the schedule for a job. See [Cron Trigger](http://www.quartz-scheduler.org/documentation/quartz-2.3.0/tutorials/crontrigger.html) for details. This field is required. + + * - timezone_id + - String + - A Java timezone ID. The schedule for a job is resolved with respect to this timezone. See [Java TimeZone](https://docs.oracle.com/javase/7/docs/api/java/util/TimeZone.html) for details. This field is required. + + ### resources.jobs.tags A map of tags associated with the job. These are forwarded to the cluster as cluster tags for jobs clusters, and are subject to the same limitations as cluster tags. A maximum of 25 tags can be added to the job. + + ### resources.jobs.trigger A configuration to trigger a run when certain conditions are met. The default behavior is that the job runs only when triggered by clicking “Run Now” in the Jobs UI or sending an API request to `runNow`. + #### Attributes -| Key | Type | Description | -|----------------|--------|-------------------------------------------------------------------------| -| `file_arrival` | Map | File arrival trigger settings. | -| `pause_status` | String | Whether this trigger is paused or not. | -| `periodic` | Map | Periodic trigger settings. | -| `table` | Map | Old table trigger settings name. Deprecated in favor of `table_update`. | -| `table_update` | Map | - | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - file_arrival + - Map + - File arrival trigger settings. + + * - pause_status + - String + - Whether this trigger is paused or not. + + * - periodic + - Map + - Periodic trigger settings. + + * - table + - Map + - Old table trigger settings name. Deprecated in favor of `table_update`. + + * - table_update + - Map + - + + ### resources.jobs.trigger.file_arrival File arrival trigger settings. + #### Attributes -| Key | Type | Description | -|-------------------------------------|---------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| `min_time_between_triggers_seconds` | Integer | If set, the trigger starts a run only after the specified amount of time passed since the last time the trigger fired. The minimum allowed value is 60 seconds | -| `url` | String | URL to be monitored for file arrivals. The path must point to the root or a subpath of the external location. | -| `wait_after_last_change_seconds` | Integer | If set, the trigger starts a run only after no file activity has occurred for the specified amount of time. This makes it possible to wait for a batch of incoming files to arrive before triggering a run. The minimum allowed value is 60 seconds. | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - min_time_between_triggers_seconds + - Integer + - If set, the trigger starts a run only after the specified amount of time passed since the last time the trigger fired. The minimum allowed value is 60 seconds + + * - url + - String + - URL to be monitored for file arrivals. The path must point to the root or a subpath of the external location. + + * - wait_after_last_change_seconds + - Integer + - If set, the trigger starts a run only after no file activity has occurred for the specified amount of time. This makes it possible to wait for a batch of incoming files to arrive before triggering a run. The minimum allowed value is 60 seconds. + + ### resources.jobs.trigger.periodic Periodic trigger settings. + #### Attributes -| Key | Type | Description | -|------------|---------|-----------------------------------------------| -| `interval` | Integer | The interval at which the trigger should run. | -| `unit` | String | The unit of time for the interval. | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - interval + - Integer + - The interval at which the trigger should run. + + * - unit + - String + - The unit of time for the interval. + + ### resources.jobs.trigger.table Old table trigger settings name. Deprecated in favor of `table_update`. + #### Attributes -| Key | Type | Description | -|-------------------------------------|----------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| `condition` | String | The table(s) condition based on which to trigger a job run. | -| `min_time_between_triggers_seconds` | Integer | If set, the trigger starts a run only after the specified amount of time has passed since the last time the trigger fired. The minimum allowed value is 60 seconds. | -| `table_names` | Sequence | A list of Delta tables to monitor for changes. The table name must be in the format `catalog_name.schema_name.table_name`. | -| `wait_after_last_change_seconds` | Integer | If set, the trigger starts a run only after no table updates have occurred for the specified time and can be used to wait for a series of table updates before triggering a run. The minimum allowed value is 60 seconds. | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - condition + - String + - The table(s) condition based on which to trigger a job run. + + * - min_time_between_triggers_seconds + - Integer + - If set, the trigger starts a run only after the specified amount of time has passed since the last time the trigger fired. The minimum allowed value is 60 seconds. + + * - table_names + - Sequence + - A list of Delta tables to monitor for changes. The table name must be in the format `catalog_name.schema_name.table_name`. + + * - wait_after_last_change_seconds + - Integer + - If set, the trigger starts a run only after no table updates have occurred for the specified time and can be used to wait for a series of table updates before triggering a run. The minimum allowed value is 60 seconds. + + ### resources.jobs.trigger.table_update + #### Attributes -| Key | Type | Description | -|-------------------------------------|----------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| `condition` | String | The table(s) condition based on which to trigger a job run. | -| `min_time_between_triggers_seconds` | Integer | If set, the trigger starts a run only after the specified amount of time has passed since the last time the trigger fired. The minimum allowed value is 60 seconds. | -| `table_names` | Sequence | A list of Delta tables to monitor for changes. The table name must be in the format `catalog_name.schema_name.table_name`. | -| `wait_after_last_change_seconds` | Integer | If set, the trigger starts a run only after no table updates have occurred for the specified time and can be used to wait for a series of table updates before triggering a run. The minimum allowed value is 60 seconds. | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - condition + - String + - The table(s) condition based on which to trigger a job run. + + * - min_time_between_triggers_seconds + - Integer + - If set, the trigger starts a run only after the specified amount of time has passed since the last time the trigger fired. The minimum allowed value is 60 seconds. + + * - table_names + - Sequence + - A list of Delta tables to monitor for changes. The table name must be in the format `catalog_name.schema_name.table_name`. + + * - wait_after_last_change_seconds + - Integer + - If set, the trigger starts a run only after no table updates have occurred for the specified time and can be used to wait for a series of table updates before triggering a run. The minimum allowed value is 60 seconds. + + ### resources.jobs.webhook_notifications A collection of system notification IDs to notify when runs of this job begin or complete. + #### Attributes -| Key | Type | Description | -|------------------------------------------|----------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| `on_duration_warning_threshold_exceeded` | Sequence | An optional list of system notification IDs to call when the duration of a run exceeds the threshold specified for the `RUN_DURATION_SECONDS` metric in the `health` field. A maximum of 3 destinations can be specified for the `on_duration_warning_threshold_exceeded` property. | -| `on_failure` | Sequence | An optional list of system notification IDs to call when the run fails. A maximum of 3 destinations can be specified for the `on_failure` property. | -| `on_start` | Sequence | An optional list of system notification IDs to call when the run starts. A maximum of 3 destinations can be specified for the `on_start` property. | -| `on_streaming_backlog_exceeded` | Sequence | An optional list of system notification IDs to call when any streaming backlog thresholds are exceeded for any stream. Streaming backlog thresholds can be set in the `health` field using the following metrics: `STREAMING_BACKLOG_BYTES`, `STREAMING_BACKLOG_RECORDS`, `STREAMING_BACKLOG_SECONDS`, or `STREAMING_BACKLOG_FILES`. Alerting is based on the 10-minute average of these metrics. If the issue persists, notifications are resent every 30 minutes. A maximum of 3 destinations can be specified for the `on_streaming_backlog_exceeded` property. | -| `on_success` | Sequence | An optional list of system notification IDs to call when the run completes successfully. A maximum of 3 destinations can be specified for the `on_success` property. | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - on_duration_warning_threshold_exceeded + - Sequence + - An optional list of system notification IDs to call when the duration of a run exceeds the threshold specified for the `RUN_DURATION_SECONDS` metric in the `health` field. A maximum of 3 destinations can be specified for the `on_duration_warning_threshold_exceeded` property. + + * - on_failure + - Sequence + - An optional list of system notification IDs to call when the run fails. A maximum of 3 destinations can be specified for the `on_failure` property. + + * - on_start + - Sequence + - An optional list of system notification IDs to call when the run starts. A maximum of 3 destinations can be specified for the `on_start` property. + + * - on_streaming_backlog_exceeded + - Sequence + - An optional list of system notification IDs to call when any streaming backlog thresholds are exceeded for any stream. Streaming backlog thresholds can be set in the `health` field using the following metrics: `STREAMING_BACKLOG_BYTES`, `STREAMING_BACKLOG_RECORDS`, `STREAMING_BACKLOG_SECONDS`, or `STREAMING_BACKLOG_FILES`. Alerting is based on the 10-minute average of these metrics. If the issue persists, notifications are resent every 30 minutes. A maximum of 3 destinations can be specified for the `on_streaming_backlog_exceeded` property. + + * - on_success + - Sequence + - An optional list of system notification IDs to call when the run completes successfully. A maximum of 3 destinations can be specified for the `on_success` property. + + ### resources.model_serving_endpoints The model serving endpoint definitions for the bundle. -| Key | Type | Description | -|----------|------|----------------------------| -| `` | Map | The definition of the item | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - + - Map + - The definition of the item + Each item has the following attributes: -| Key | Type | Description | -|-------------------|----------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| `ai_gateway` | Map | The AI Gateway configuration for the serving endpoint. NOTE: only external model endpoints are supported as of now. | -| `config` | Map | The core config of the serving endpoint. | -| `name` | String | The name of the serving endpoint. This field is required and must be unique across a Databricks workspace. An endpoint name can consist of alphanumeric characters, dashes, and underscores. | -| `permissions` | Sequence | - | -| `rate_limits` | Sequence | Rate limits to be applied to the serving endpoint. NOTE: this field is deprecated, please use AI Gateway to manage rate limits. | -| `route_optimized` | Boolean | Enable route optimization for the serving endpoint. | -| `tags` | Sequence | Tags to be attached to the serving endpoint and automatically propagated to billing logs. | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - ai_gateway + - Map + - The AI Gateway configuration for the serving endpoint. NOTE: only external model endpoints are supported as of now. + + * - config + - Map + - The core config of the serving endpoint. + + * - name + - String + - The name of the serving endpoint. This field is required and must be unique across a Databricks workspace. An endpoint name can consist of alphanumeric characters, dashes, and underscores. + + * - permissions + - Sequence + - + + * - rate_limits + - Sequence + - Rate limits to be applied to the serving endpoint. NOTE: this field is deprecated, please use AI Gateway to manage rate limits. + + * - route_optimized + - Boolean + - Enable route optimization for the serving endpoint. + + * - tags + - Sequence + - Tags to be attached to the serving endpoint and automatically propagated to billing logs. + + ### resources.model_serving_endpoints.ai_gateway The AI Gateway configuration for the serving endpoint. NOTE: only external model endpoints are supported as of now. + #### Attributes -| Key | Type | Description | -|--------------------------|----------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| `guardrails` | Map | Configuration for AI Guardrails to prevent unwanted data and unsafe data in requests and responses. | -| `inference_table_config` | Map | Configuration for payload logging using inference tables. Use these tables to monitor and audit data being sent to and received from model APIs and to improve model quality. | -| `rate_limits` | Sequence | Configuration for rate limits which can be set to limit endpoint traffic. | -| `usage_tracking_config` | Map | Configuration to enable usage tracking using system tables. These tables allow you to monitor operational usage on endpoints and their associated costs. | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - guardrails + - Map + - Configuration for AI Guardrails to prevent unwanted data and unsafe data in requests and responses. + + * - inference_table_config + - Map + - Configuration for payload logging using inference tables. Use these tables to monitor and audit data being sent to and received from model APIs and to improve model quality. + + * - rate_limits + - Sequence + - Configuration for rate limits which can be set to limit endpoint traffic. + + * - usage_tracking_config + - Map + - Configuration to enable usage tracking using system tables. These tables allow you to monitor operational usage on endpoints and their associated costs. + + ### resources.model_serving_endpoints.ai_gateway.guardrails Configuration for AI Guardrails to prevent unwanted data and unsafe data in requests and responses. + #### Attributes -| Key | Type | Description | -|----------|------|---------------------------------------------| -| `input` | Map | Configuration for input guardrail filters. | -| `output` | Map | Configuration for output guardrail filters. | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - input + - Map + - Configuration for input guardrail filters. + + * - output + - Map + - Configuration for output guardrail filters. + + ### resources.model_serving_endpoints.ai_gateway.guardrails.input Configuration for input guardrail filters. + #### Attributes -| Key | Type | Description | -|--------------------|----------|--------------------------------------------------------------------------------------------------------------------------------------------| -| `invalid_keywords` | Sequence | List of invalid keywords. AI guardrail uses keyword or string matching to decide if the keyword exists in the request or response content. | -| `pii` | Map | Configuration for guardrail PII filter. | -| `safety` | Boolean | Indicates whether the safety filter is enabled. | -| `valid_topics` | Sequence | The list of allowed topics. Given a chat request, this guardrail flags the request if its topic is not in the allowed topics. | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - invalid_keywords + - Sequence + - List of invalid keywords. AI guardrail uses keyword or string matching to decide if the keyword exists in the request or response content. + + * - pii + - Map + - Configuration for guardrail PII filter. + + * - safety + - Boolean + - Indicates whether the safety filter is enabled. + + * - valid_topics + - Sequence + - The list of allowed topics. Given a chat request, this guardrail flags the request if its topic is not in the allowed topics. + + ### resources.model_serving_endpoints.ai_gateway.guardrails.input.pii Configuration for guardrail PII filter. + #### Attributes -| Key | Type | Description | -|------------|--------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| `behavior` | String | Behavior for PII filter. Currently only 'BLOCK' is supported. If 'BLOCK' is set for the input guardrail and the request contains PII, the request is not sent to the model server and 400 status code is returned; if 'BLOCK' is set for the output guardrail and the model response contains PII, the PII info in the response is redacted and 400 status code is returned. | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - behavior + - String + - Behavior for PII filter. Currently only 'BLOCK' is supported. If 'BLOCK' is set for the input guardrail and the request contains PII, the request is not sent to the model server and 400 status code is returned; if 'BLOCK' is set for the output guardrail and the model response contains PII, the PII info in the response is redacted and 400 status code is returned. + + ### resources.model_serving_endpoints.ai_gateway.guardrails.output Configuration for output guardrail filters. + #### Attributes -| Key | Type | Description | -|--------------------|----------|--------------------------------------------------------------------------------------------------------------------------------------------| -| `invalid_keywords` | Sequence | List of invalid keywords. AI guardrail uses keyword or string matching to decide if the keyword exists in the request or response content. | -| `pii` | Map | Configuration for guardrail PII filter. | -| `safety` | Boolean | Indicates whether the safety filter is enabled. | -| `valid_topics` | Sequence | The list of allowed topics. Given a chat request, this guardrail flags the request if its topic is not in the allowed topics. | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - invalid_keywords + - Sequence + - List of invalid keywords. AI guardrail uses keyword or string matching to decide if the keyword exists in the request or response content. + + * - pii + - Map + - Configuration for guardrail PII filter. + + * - safety + - Boolean + - Indicates whether the safety filter is enabled. + + * - valid_topics + - Sequence + - The list of allowed topics. Given a chat request, this guardrail flags the request if its topic is not in the allowed topics. + + ### resources.model_serving_endpoints.ai_gateway.guardrails.output.pii Configuration for guardrail PII filter. + #### Attributes -| Key | Type | Description | -|------------|--------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| `behavior` | String | Behavior for PII filter. Currently only 'BLOCK' is supported. If 'BLOCK' is set for the input guardrail and the request contains PII, the request is not sent to the model server and 400 status code is returned; if 'BLOCK' is set for the output guardrail and the model response contains PII, the PII info in the response is redacted and 400 status code is returned. | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - behavior + - String + - Behavior for PII filter. Currently only 'BLOCK' is supported. If 'BLOCK' is set for the input guardrail and the request contains PII, the request is not sent to the model server and 400 status code is returned; if 'BLOCK' is set for the output guardrail and the model response contains PII, the PII info in the response is redacted and 400 status code is returned. + + ### resources.model_serving_endpoints.ai_gateway.inference_table_config Configuration for payload logging using inference tables. Use these tables to monitor and audit data being sent to and received from model APIs and to improve model quality. + #### Attributes -| Key | Type | Description | -|---------------------|---------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| `catalog_name` | String | The name of the catalog in Unity Catalog. Required when enabling inference tables. NOTE: On update, you have to disable inference table first in order to change the catalog name. | -| `enabled` | Boolean | Indicates whether the inference table is enabled. | -| `schema_name` | String | The name of the schema in Unity Catalog. Required when enabling inference tables. NOTE: On update, you have to disable inference table first in order to change the schema name. | -| `table_name_prefix` | String | The prefix of the table in Unity Catalog. NOTE: On update, you have to disable inference table first in order to change the prefix name. | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - catalog_name + - String + - The name of the catalog in Unity Catalog. Required when enabling inference tables. NOTE: On update, you have to disable inference table first in order to change the catalog name. + + * - enabled + - Boolean + - Indicates whether the inference table is enabled. + + * - schema_name + - String + - The name of the schema in Unity Catalog. Required when enabling inference tables. NOTE: On update, you have to disable inference table first in order to change the schema name. + + * - table_name_prefix + - String + - The prefix of the table in Unity Catalog. NOTE: On update, you have to disable inference table first in order to change the prefix name. + + ### resources.model_serving_endpoints.ai_gateway.usage_tracking_config Configuration to enable usage tracking using system tables. These tables allow you to monitor operational usage on endpoints and their associated costs. + #### Attributes -| Key | Type | Description | -|-----------|---------|-----------------------------------| -| `enabled` | Boolean | Whether to enable usage tracking. | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - enabled + - Boolean + - Whether to enable usage tracking. + + ### resources.model_serving_endpoints.config The core config of the serving endpoint. + #### Attributes -| Key | Type | Description | -|-----------------------|----------|--------------------------------------------------------------------------------------------------------------------------------------------------| -| `auto_capture_config` | Map | Configuration for Inference Tables which automatically logs requests and responses to Unity Catalog. | -| `served_entities` | Sequence | A list of served entities for the endpoint to serve. A serving endpoint can have up to 15 served entities. | -| `served_models` | Sequence | (Deprecated, use served_entities instead) A list of served models for the endpoint to serve. A serving endpoint can have up to 15 served models. | -| `traffic_config` | Map | The traffic config defining how invocations to the serving endpoint should be routed. | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - auto_capture_config + - Map + - Configuration for Inference Tables which automatically logs requests and responses to Unity Catalog. + + * - served_entities + - Sequence + - A list of served entities for the endpoint to serve. A serving endpoint can have up to 15 served entities. + + * - served_models + - Sequence + - (Deprecated, use served_entities instead) A list of served models for the endpoint to serve. A serving endpoint can have up to 15 served models. + + * - traffic_config + - Map + - The traffic config defining how invocations to the serving endpoint should be routed. + + ### resources.model_serving_endpoints.config.auto_capture_config Configuration for Inference Tables which automatically logs requests and responses to Unity Catalog. + #### Attributes -| Key | Type | Description | -|---------------------|---------|------------------------------------------------------------------------------------------------------------------------------------------| -| `catalog_name` | String | The name of the catalog in Unity Catalog. NOTE: On update, you cannot change the catalog name if the inference table is already enabled. | -| `enabled` | Boolean | Indicates whether the inference table is enabled. | -| `schema_name` | String | The name of the schema in Unity Catalog. NOTE: On update, you cannot change the schema name if the inference table is already enabled. | -| `table_name_prefix` | String | The prefix of the table in Unity Catalog. NOTE: On update, you cannot change the prefix name if the inference table is already enabled. | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - catalog_name + - String + - The name of the catalog in Unity Catalog. NOTE: On update, you cannot change the catalog name if the inference table is already enabled. + + * - enabled + - Boolean + - Indicates whether the inference table is enabled. + + * - schema_name + - String + - The name of the schema in Unity Catalog. NOTE: On update, you cannot change the schema name if the inference table is already enabled. + + * - table_name_prefix + - String + - The prefix of the table in Unity Catalog. NOTE: On update, you cannot change the prefix name if the inference table is already enabled. + + ### resources.model_serving_endpoints.config.traffic_config The traffic config defining how invocations to the serving endpoint should be routed. + #### Attributes -| Key | Type | Description | -|----------|----------|---------------------------------------------------------------| -| `routes` | Sequence | The list of routes that define traffic to each served entity. | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - routes + - Sequence + - The list of routes that define traffic to each served entity. + + ### resources.models The model definitions for the bundle. -| Key | Type | Description | -|----------|------|----------------------------| -| `` | Map | The definition of the item | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - + - Map + - The definition of the item + Each item has the following attributes: -| Key | Type | Description | -|--------------------------|----------|-------------------------------------------------------------------------------------------------------| -| `creation_timestamp` | Integer | Timestamp recorded when this `registered_model` was created. | -| `description` | String | Description of this `registered_model`. | -| `last_updated_timestamp` | Integer | Timestamp recorded when metadata for this `registered_model` was last updated. | -| `latest_versions` | Sequence | Collection of latest model versions for each stage. Only contains models with current `READY` status. | -| `name` | String | Unique name for the model. | -| `permissions` | Sequence | - | -| `tags` | Sequence | Tags: Additional metadata key-value pairs for this `registered_model`. | -| `user_id` | String | User that created this `registered_model` | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - creation_timestamp + - Integer + - Timestamp recorded when this `registered_model` was created. + + * - description + - String + - Description of this `registered_model`. + + * - last_updated_timestamp + - Integer + - Timestamp recorded when metadata for this `registered_model` was last updated. + + * - latest_versions + - Sequence + - Collection of latest model versions for each stage. Only contains models with current `READY` status. + + * - name + - String + - Unique name for the model. + + * - permissions + - Sequence + - + + * - tags + - Sequence + - Tags: Additional metadata key-value pairs for this `registered_model`. + + * - user_id + - String + - User that created this `registered_model` + + ### resources.pipelines The pipeline definitions for the bundle. -| Key | Type | Description | -|----------|------|----------------------------| -| `` | Map | The definition of the item | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - + - Map + - The definition of the item + Each item has the following attributes: -| Key | Type | Description | -|------------------------|----------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| `budget_policy_id` | String | Budget policy of this pipeline. | -| `catalog` | String | A catalog in Unity Catalog to publish data from this pipeline to. If `target` is specified, tables in this pipeline are published to a `target` schema inside `catalog` (for example, `catalog`.`target`.`table`). If `target` is not specified, no data is published to Unity Catalog. | -| `channel` | String | DLT Release Channel that specifies which version to use. | -| `clusters` | Sequence | Cluster settings for this pipeline deployment. | -| `configuration` | Map | String-String configuration for this pipeline execution. | -| `continuous` | Boolean | Whether the pipeline is continuous or triggered. This replaces `trigger`. | -| `deployment` | Map | Deployment type of this pipeline. | -| `development` | Boolean | Whether the pipeline is in Development mode. Defaults to false. | -| `edition` | String | Pipeline product edition. | -| `filters` | Map | Filters on which Pipeline packages to include in the deployed graph. | -| `gateway_definition` | Map | The definition of a gateway pipeline to support change data capture. | -| `id` | String | Unique identifier for this pipeline. | -| `ingestion_definition` | Map | The configuration for a managed ingestion pipeline. These settings cannot be used with the 'libraries', 'target' or 'catalog' settings. | -| `libraries` | Sequence | Libraries or code needed by this deployment. | -| `name` | String | Friendly identifier for this pipeline. | -| `notifications` | Sequence | List of notification settings for this pipeline. | -| `permissions` | Sequence | - | -| `photon` | Boolean | Whether Photon is enabled for this pipeline. | -| `restart_window` | Map | Restart window of this pipeline. | -| `schema` | String | The default schema (database) where tables are read from or published to. The presence of this field implies that the pipeline is in direct publishing mode. | -| `serverless` | Boolean | Whether serverless compute is enabled for this pipeline. | -| `storage` | String | DBFS root directory for storing checkpoints and tables. | -| `target` | String | Target schema (database) to add tables in this pipeline to. If not specified, no data is published to the Hive metastore or Unity Catalog. To publish to Unity Catalog, also specify `catalog`. | -| `trigger` | Map | Which pipeline trigger to use. Deprecated: Use `continuous` instead. | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - budget_policy_id + - String + - Budget policy of this pipeline. + + * - catalog + - String + - A catalog in Unity Catalog to publish data from this pipeline to. If `target` is specified, tables in this pipeline are published to a `target` schema inside `catalog` (for example, `catalog`.`target`.`table`). If `target` is not specified, no data is published to Unity Catalog. + + * - channel + - String + - DLT Release Channel that specifies which version to use. + + * - clusters + - Sequence + - Cluster settings for this pipeline deployment. + + * - configuration + - Map + - String-String configuration for this pipeline execution. + + * - continuous + - Boolean + - Whether the pipeline is continuous or triggered. This replaces `trigger`. + + * - deployment + - Map + - Deployment type of this pipeline. + + * - development + - Boolean + - Whether the pipeline is in Development mode. Defaults to false. + + * - edition + - String + - Pipeline product edition. + + * - filters + - Map + - Filters on which Pipeline packages to include in the deployed graph. + + * - gateway_definition + - Map + - The definition of a gateway pipeline to support change data capture. + + * - id + - String + - Unique identifier for this pipeline. + + * - ingestion_definition + - Map + - The configuration for a managed ingestion pipeline. These settings cannot be used with the 'libraries', 'target' or 'catalog' settings. + + * - libraries + - Sequence + - Libraries or code needed by this deployment. + + * - name + - String + - Friendly identifier for this pipeline. + + * - notifications + - Sequence + - List of notification settings for this pipeline. + + * - permissions + - Sequence + - + + * - photon + - Boolean + - Whether Photon is enabled for this pipeline. + + * - restart_window + - Map + - Restart window of this pipeline. + + * - schema + - String + - The default schema (database) where tables are read from or published to. The presence of this field implies that the pipeline is in direct publishing mode. + + * - serverless + - Boolean + - Whether serverless compute is enabled for this pipeline. + + * - storage + - String + - DBFS root directory for storing checkpoints and tables. + + * - target + - String + - Target schema (database) to add tables in this pipeline to. If not specified, no data is published to the Hive metastore or Unity Catalog. To publish to Unity Catalog, also specify `catalog`. + + * - trigger + - Map + - Which pipeline trigger to use. Deprecated: Use `continuous` instead. + + ### resources.pipelines.configuration String-String configuration for this pipeline execution. + + ### resources.pipelines.deployment Deployment type of this pipeline. + #### Attributes -| Key | Type | Description | -|----------------------|--------|----------------------------------------------------------------| -| `kind` | String | The deployment method that manages the pipeline. | -| `metadata_file_path` | String | The path to the file containing metadata about the deployment. | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - kind + - String + - The deployment method that manages the pipeline. + + * - metadata_file_path + - String + - The path to the file containing metadata about the deployment. + + ### resources.pipelines.filters Filters on which Pipeline packages to include in the deployed graph. + #### Attributes -| Key | Type | Description | -|-----------|----------|-------------------| -| `exclude` | Sequence | Paths to exclude. | -| `include` | Sequence | Paths to include. | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - exclude + - Sequence + - Paths to exclude. + + * - include + - Sequence + - Paths to include. + + ### resources.pipelines.gateway_definition The definition of a gateway pipeline to support change data capture. + #### Attributes -| Key | Type | Description | -|---------------------------|--------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| `connection_id` | String | [Deprecated, use connection_name instead] Immutable. The Unity Catalog connection that this gateway pipeline uses to communicate with the source. | -| `connection_name` | String | Immutable. The Unity Catalog connection that this gateway pipeline uses to communicate with the source. | -| `gateway_storage_catalog` | String | Required, Immutable. The name of the catalog for the gateway pipeline's storage location. | -| `gateway_storage_name` | String | Optional. The Unity Catalog-compatible name for the gateway storage location. This is the destination to use for the data that is extracted by the gateway. Delta Live Tables system will automatically create the storage location under the catalog and schema. | -| `gateway_storage_schema` | String | Required, Immutable. The name of the schema for the gateway pipelines's storage location. | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - connection_id + - String + - [Deprecated, use connection_name instead] Immutable. The Unity Catalog connection that this gateway pipeline uses to communicate with the source. + + * - connection_name + - String + - Immutable. The Unity Catalog connection that this gateway pipeline uses to communicate with the source. + + * - gateway_storage_catalog + - String + - Required, Immutable. The name of the catalog for the gateway pipeline's storage location. + + * - gateway_storage_name + - String + - Optional. The Unity Catalog-compatible name for the gateway storage location. This is the destination to use for the data that is extracted by the gateway. Delta Live Tables system will automatically create the storage location under the catalog and schema. + + * - gateway_storage_schema + - String + - Required, Immutable. The name of the schema for the gateway pipelines's storage location. + + ### resources.pipelines.ingestion_definition The configuration for a managed ingestion pipeline. These settings cannot be used with the 'libraries', 'target' or 'catalog' settings. + #### Attributes -| Key | Type | Description | -|------------------------|----------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| `connection_name` | String | Immutable. The Unity Catalog connection that this ingestion pipeline uses to communicate with the source. This is used with connectors for applications like Salesforce, Workday, and so on. | -| `ingestion_gateway_id` | String | Immutable. Identifier for the gateway that is used by this ingestion pipeline to communicate with the source database. This is used with connectors to databases like SQL Server. | -| `objects` | Sequence | Required. Settings specifying tables to replicate and the destination for the replicated tables. | -| `table_configuration` | Map | Configuration settings to control the ingestion of tables. These settings are applied to all tables in the pipeline. | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - connection_name + - String + - Immutable. The Unity Catalog connection that this ingestion pipeline uses to communicate with the source. This is used with connectors for applications like Salesforce, Workday, and so on. + + * - ingestion_gateway_id + - String + - Immutable. Identifier for the gateway that is used by this ingestion pipeline to communicate with the source database. This is used with connectors to databases like SQL Server. + + * - objects + - Sequence + - Required. Settings specifying tables to replicate and the destination for the replicated tables. + + * - table_configuration + - Map + - Configuration settings to control the ingestion of tables. These settings are applied to all tables in the pipeline. + + ### resources.pipelines.ingestion_definition.table_configuration Configuration settings to control the ingestion of tables. These settings are applied to all tables in the pipeline. + #### Attributes -| Key | Type | Description | -|-------------------------------------|----------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| `primary_keys` | Sequence | The primary key of the table used to apply changes. | -| `salesforce_include_formula_fields` | Boolean | If true, formula fields defined in the table are included in the ingestion. This setting is only valid for the Salesforce connector | -| `scd_type` | String | The SCD type to use to ingest the table. | -| `sequence_by` | Sequence | The column names specifying the logical order of events in the source data. Delta Live Tables uses this sequencing to handle change events that arrive out of order. | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - primary_keys + - Sequence + - The primary key of the table used to apply changes. + + * - salesforce_include_formula_fields + - Boolean + - If true, formula fields defined in the table are included in the ingestion. This setting is only valid for the Salesforce connector + + * - scd_type + - String + - The SCD type to use to ingest the table. + + * - sequence_by + - Sequence + - The column names specifying the logical order of events in the source data. Delta Live Tables uses this sequencing to handle change events that arrive out of order. + + ### resources.pipelines.restart_window Restart window of this pipeline. + #### Attributes -| Key | Type | Description | -|----------------|---------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| `days_of_week` | String | Days of week in which the restart is allowed to happen (within a five-hour window starting at start_hour). If not specified all days of the week will be used. | -| `start_hour` | Integer | An integer between 0 and 23 denoting the start hour for the restart window in the 24-hour day. Continuous pipeline restart is triggered only within a five-hour window starting at this hour. | -| `time_zone_id` | String | Time zone id of restart window. See https://docs.databricks.com/sql/language-manual/sql-ref-syntax-aux-conf-mgmt-set-timezone.html for details. If not specified, UTC will be used. | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - days_of_week + - String + - Days of week in which the restart is allowed to happen (within a five-hour window starting at start_hour). If not specified all days of the week will be used. + + * - start_hour + - Integer + - An integer between 0 and 23 denoting the start hour for the restart window in the 24-hour day. Continuous pipeline restart is triggered only within a five-hour window starting at this hour. + + * - time_zone_id + - String + - Time zone id of restart window. See https://docs.databricks.com/sql/language-manual/sql-ref-syntax-aux-conf-mgmt-set-timezone.html for details. If not specified, UTC will be used. + + ### resources.pipelines.trigger Which pipeline trigger to use. Deprecated: Use `continuous` instead. + #### Attributes -| Key | Type | Description | -|----------|------|-------------| -| `cron` | Map | - | -| `manual` | Map | - | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - cron + - Map + - + + * - manual + - Map + - + + ### resources.pipelines.trigger.cron + #### Attributes -| Key | Type | Description | -|------------------------|--------|-------------| -| `quartz_cron_schedule` | String | - | -| `timezone_id` | String | - | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - quartz_cron_schedule + - String + - + + * - timezone_id + - String + - + + ### resources.pipelines.trigger.manual + + ### resources.quality_monitors The quality monitor definitions for the bundle. -| Key | Type | Description | -|----------|------|----------------------------| -| `` | Map | The definition of the item | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - + - Map + - The definition of the item + Each item has the following attributes: -| Key | Type | Description | -|------------------------------|----------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| `assets_dir` | String | The directory to store monitoring assets (e.g. dashboard, metric tables). | -| `baseline_table_name` | String | Name of the baseline table from which drift metrics are computed from. Columns in the monitored table should also be present in the baseline table. | -| `custom_metrics` | Sequence | Custom metrics to compute on the monitored table. These can be aggregate metrics, derived metrics (from already computed aggregate metrics), or drift metrics (comparing metrics across time windows). | -| `data_classification_config` | Map | The data classification config for the monitor. | -| `inference_log` | Map | Configuration for monitoring inference logs. | -| `notifications` | Map | The notification settings for the monitor. | -| `output_schema_name` | String | Schema where output metric tables are created. | -| `schedule` | Map | The schedule for automatically updating and refreshing metric tables. | -| `skip_builtin_dashboard` | Boolean | Whether to skip creating a default dashboard summarizing data quality metrics. | -| `slicing_exprs` | Sequence | List of column expressions to slice data with for targeted analysis. The data is grouped by each expression independently, resulting in a separate slice for each predicate and its complements. For high-cardinality columns, only the top 100 unique values by frequency will generate slices. | -| `snapshot` | Map | Configuration for monitoring snapshot tables. | -| `table_name` | String | - | -| `time_series` | Map | Configuration for monitoring time series tables. | -| `warehouse_id` | String | Optional argument to specify the warehouse for dashboard creation. If not specified, the first running warehouse will be used. | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - assets_dir + - String + - The directory to store monitoring assets (e.g. dashboard, metric tables). + + * - baseline_table_name + - String + - Name of the baseline table from which drift metrics are computed from. Columns in the monitored table should also be present in the baseline table. + + * - custom_metrics + - Sequence + - Custom metrics to compute on the monitored table. These can be aggregate metrics, derived metrics (from already computed aggregate metrics), or drift metrics (comparing metrics across time windows). + + * - data_classification_config + - Map + - The data classification config for the monitor. + + * - inference_log + - Map + - Configuration for monitoring inference logs. + + * - notifications + - Map + - The notification settings for the monitor. + + * - output_schema_name + - String + - Schema where output metric tables are created. + + * - schedule + - Map + - The schedule for automatically updating and refreshing metric tables. + + * - skip_builtin_dashboard + - Boolean + - Whether to skip creating a default dashboard summarizing data quality metrics. + + * - slicing_exprs + - Sequence + - List of column expressions to slice data with for targeted analysis. The data is grouped by each expression independently, resulting in a separate slice for each predicate and its complements. For high-cardinality columns, only the top 100 unique values by frequency will generate slices. + + * - snapshot + - Map + - Configuration for monitoring snapshot tables. + + * - table_name + - String + - + + * - time_series + - Map + - Configuration for monitoring time series tables. + + * - warehouse_id + - String + - Optional argument to specify the warehouse for dashboard creation. If not specified, the first running warehouse will be used. + + ### resources.quality_monitors.data_classification_config The data classification config for the monitor. + #### Attributes -| Key | Type | Description | -|-----------|---------|-----------------------------------------| -| `enabled` | Boolean | Whether data classification is enabled. | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - enabled + - Boolean + - Whether data classification is enabled. + + ### resources.quality_monitors.inference_log Configuration for monitoring inference logs. + #### Attributes -| Key | Type | Description | -|------------------------|----------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| `granularities` | Sequence | Granularities for aggregating data into time windows based on their timestamp. Currently the following static granularities are supported: {``"5 minutes"``, ``"30 minutes"``, ``"1 hour"``, ``"1 day"``, ``" week(s)"``, ``"1 month"``, ``"1 year"``}. | -| `label_col` | String | Optional column that contains the ground truth for the prediction. | -| `model_id_col` | String | Column that contains the id of the model generating the predictions. Metrics will be computed per model id by default, and also across all model ids. | -| `prediction_col` | String | Column that contains the output/prediction from the model. | -| `prediction_proba_col` | String | Optional column that contains the prediction probabilities for each class in a classification problem type. The values in this column should be a map, mapping each class label to the prediction probability for a given sample. The map should be of PySpark MapType(). | -| `problem_type` | String | Problem type the model aims to solve. Determines the type of model-quality metrics that will be computed. | -| `timestamp_col` | String | Column that contains the timestamps of requests. The column must be one of the following: - A ``TimestampType`` column - A column whose values can be converted to timestamps through the pyspark ``to_timestamp`` [function](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.functions.to_timestamp.html). | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - granularities + - Sequence + - Granularities for aggregating data into time windows based on their timestamp. Currently the following static granularities are supported: {``"5 minutes"``, ``"30 minutes"``, ``"1 hour"``, ``"1 day"``, ``" week(s)"``, ``"1 month"``, ``"1 year"``}. + + * - label_col + - String + - Optional column that contains the ground truth for the prediction. + + * - model_id_col + - String + - Column that contains the id of the model generating the predictions. Metrics will be computed per model id by default, and also across all model ids. + + * - prediction_col + - String + - Column that contains the output/prediction from the model. + + * - prediction_proba_col + - String + - Optional column that contains the prediction probabilities for each class in a classification problem type. The values in this column should be a map, mapping each class label to the prediction probability for a given sample. The map should be of PySpark MapType(). + + * - problem_type + - String + - Problem type the model aims to solve. Determines the type of model-quality metrics that will be computed. + + * - timestamp_col + - String + - Column that contains the timestamps of requests. The column must be one of the following: - A ``TimestampType`` column - A column whose values can be converted to timestamps through the pyspark ``to_timestamp`` [function](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.functions.to_timestamp.html). + + ### resources.quality_monitors.notifications The notification settings for the monitor. + #### Attributes -| Key | Type | Description | -|--------------------------------------|------|------------------------------------------------------------------------------| -| `on_failure` | Map | Who to send notifications to on monitor failure. | -| `on_new_classification_tag_detected` | Map | Who to send notifications to when new data classification tags are detected. | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - on_failure + - Map + - Who to send notifications to on monitor failure. + + * - on_new_classification_tag_detected + - Map + - Who to send notifications to when new data classification tags are detected. + + ### resources.quality_monitors.notifications.on_failure Who to send notifications to on monitor failure. + #### Attributes -| Key | Type | Description | -|-------------------|----------|-------------------------------------------------------------------------------------------------------| -| `email_addresses` | Sequence | The list of email addresses to send the notification to. A maximum of 5 email addresses is supported. | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - email_addresses + - Sequence + - The list of email addresses to send the notification to. A maximum of 5 email addresses is supported. + + ### resources.quality_monitors.notifications.on_new_classification_tag_detected Who to send notifications to when new data classification tags are detected. + #### Attributes -| Key | Type | Description | -|-------------------|----------|-------------------------------------------------------------------------------------------------------| -| `email_addresses` | Sequence | The list of email addresses to send the notification to. A maximum of 5 email addresses is supported. | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - email_addresses + - Sequence + - The list of email addresses to send the notification to. A maximum of 5 email addresses is supported. + + ### resources.quality_monitors.schedule The schedule for automatically updating and refreshing metric tables. + #### Attributes -| Key | Type | Description | -|--------------------------|--------|------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| `pause_status` | String | Read only field that indicates whether a schedule is paused or not. | -| `quartz_cron_expression` | String | The expression that determines when to run the monitor. See [examples](https://www.quartz-scheduler.org/documentation/quartz-2.3.0/tutorials/crontrigger.html). | -| `timezone_id` | String | The timezone id (e.g., ``"PST"``) in which to evaluate the quartz expression. | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - pause_status + - String + - Read only field that indicates whether a schedule is paused or not. + + * - quartz_cron_expression + - String + - The expression that determines when to run the monitor. See [examples](https://www.quartz-scheduler.org/documentation/quartz-2.3.0/tutorials/crontrigger.html). + + * - timezone_id + - String + - The timezone id (e.g., ``"PST"``) in which to evaluate the quartz expression. + + ### resources.quality_monitors.snapshot Configuration for monitoring snapshot tables. + + ### resources.quality_monitors.time_series Configuration for monitoring time series tables. + #### Attributes -| Key | Type | Description | -|-----------------|----------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| `granularities` | Sequence | Granularities for aggregating data into time windows based on their timestamp. Currently the following static granularities are supported: {``"5 minutes"``, ``"30 minutes"``, ``"1 hour"``, ``"1 day"``, ``" week(s)"``, ``"1 month"``, ``"1 year"``}. | -| `timestamp_col` | String | Column that contains the timestamps of requests. The column must be one of the following: - A ``TimestampType`` column - A column whose values can be converted to timestamps through the pyspark ``to_timestamp`` [function](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.functions.to_timestamp.html). | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - granularities + - Sequence + - Granularities for aggregating data into time windows based on their timestamp. Currently the following static granularities are supported: {``"5 minutes"``, ``"30 minutes"``, ``"1 hour"``, ``"1 day"``, ``" week(s)"``, ``"1 month"``, ``"1 year"``}. + + * - timestamp_col + - String + - Column that contains the timestamps of requests. The column must be one of the following: - A ``TimestampType`` column - A column whose values can be converted to timestamps through the pyspark ``to_timestamp`` [function](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.functions.to_timestamp.html). + + ### resources.registered_models The registered model definitions for the bundle. -| Key | Type | Description | -|----------|------|----------------------------| -| `` | Map | The definition of the item | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - + - Map + - The definition of the item + Each item has the following attributes: -| Key | Type | Description | -|--------------------|----------|-----------------------------------------------------------------------------------| -| `catalog_name` | String | The name of the catalog where the schema and the registered model reside | -| `comment` | String | The comment attached to the registered model | -| `grants` | Sequence | - | -| `name` | String | The name of the registered model | -| `schema_name` | String | The name of the schema where the registered model resides | -| `storage_location` | String | The storage location on the cloud under which model version data files are stored | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - catalog_name + - String + - The name of the catalog where the schema and the registered model reside + + * - comment + - String + - The comment attached to the registered model + + * - grants + - Sequence + - + + * - name + - String + - The name of the registered model + + * - schema_name + - String + - The name of the schema where the registered model resides + + * - storage_location + - String + - The storage location on the cloud under which model version data files are stored + + ### resources.schemas The schema definitions for the bundle. -| Key | Type | Description | -|----------|------|----------------------------| -| `` | Map | The definition of the item | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - + - Map + - The definition of the item + Each item has the following attributes: -| Key | Type | Description | -|----------------|----------|----------------------------------------------------| -| `catalog_name` | String | Name of parent catalog. | -| `comment` | String | User-provided free-form text description. | -| `grants` | Sequence | - | -| `name` | String | Name of schema, relative to parent catalog. | -| `properties` | Map | - | -| `storage_root` | String | Storage root URL for managed tables within schema. | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - catalog_name + - String + - Name of parent catalog. + + * - comment + - String + - User-provided free-form text description. + + * - grants + - Sequence + - + + * - name + - String + - Name of schema, relative to parent catalog. + + * - properties + - Map + - + + * - storage_root + - String + - Storage root URL for managed tables within schema. + + ### resources.schemas.properties + + ### resources.volumes -| Key | Type | Description | -|----------|------|----------------------------| -| `` | Map | The definition of the item | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - + - Map + - The definition of the item + Each item has the following attributes: -| Key | Type | Description | -|--------------------|----------|-------------------------------------------------------------| -| `catalog_name` | String | The name of the catalog where the schema and the volume are | -| `comment` | String | The comment attached to the volume | -| `grants` | Sequence | - | -| `name` | String | The name of the volume | -| `schema_name` | String | The name of the schema where the volume is | -| `storage_location` | String | The storage location on the cloud | -| `volume_type` | String | - | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - catalog_name + - String + - The name of the catalog where the schema and the volume are + + * - comment + - String + - The comment attached to the volume + + * - grants + - Sequence + - + + * - name + - String + - The name of the volume + + * - schema_name + - String + - The name of the schema where the volume is + + * - storage_location + - String + - The storage location on the cloud + + * - volume_type + - String + - + + ## run_as The identity to use to run the bundle. + #### Attributes -| Key | Type | Description | -|--------------------------|--------|--------------------------------------------------------------------------------------------------------------| -| `service_principal_name` | String | Application ID of an active service principal. Setting this field requires the `servicePrincipal/user` role. | -| `user_name` | String | The email of an active workspace user. Non-admin users can only set this field to their own email. | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - service_principal_name + - String + - Application ID of an active service principal. Setting this field requires the `servicePrincipal/user` role. + + * - user_name + - String + - The email of an active workspace user. Non-admin users can only set this field to their own email. + + ## sync The files and file paths to include or exclude in the bundle. See [_](/dev-tools/bundles/) + #### Attributes -| Key | Type | Description | -|-----------|----------|----------------------------------------------------------------------------------------------------------------------------| -| `exclude` | Sequence | A list of files or folders to exclude from the bundle. | -| `include` | Sequence | A list of files or folders to include in the bundle. | -| `paths` | Sequence | The local folder paths, which can be outside the bundle root, to synchronize to the workspace when the bundle is deployed. | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - exclude + - Sequence + - A list of files or folders to exclude from the bundle. + + * - include + - Sequence + - A list of files or folders to include in the bundle. + + * - paths + - Sequence + - The local folder paths, which can be outside the bundle root, to synchronize to the workspace when the bundle is deployed. + + ## targets Defines deployment targets for the bundle. -| Key | Type | Description | -|----------|------|----------------------------| -| `` | Map | The definition of the item | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - + - Map + - The definition of the item + Each item has the following attributes: -| Key | Type | Description | -|---------------|----------|---------------------------------------------------------------------------------------------------------------------------------------------| -| `artifacts` | Map | The artifacts to include in the target deployment. See [_](#artifact) | -| `bundle` | Map | The name of the bundle when deploying to this target. | -| `cluster_id` | String | The ID of the cluster to use for this target. | -| `compute_id` | String | Deprecated. The ID of the compute to use for this target. | -| `default` | Boolean | Whether this target is the default target. | -| `git` | Map | The Git version control settings for the target. See [_](#git). | -| `mode` | String | The deployment mode for the target. Valid values are `development` or `production`. See [_](/dev-tools/bundles/deployment-modes.md). | -| `permissions` | Sequence | The permissions for deploying and running the bundle in the target. See [_](#permission). | -| `presets` | Map | The deployment presets for the target. See [_](#preset). | -| `resources` | Map | The resource definitions for the target. See [_](#resources). | -| `run_as` | Map | The identity to use to run the bundle. See [_](#job_run_as) and [_](/dev-tools/bundles/run_as.md). | -| `sync` | Map | The local paths to sync to the target workspace when a bundle is run or deployed. See [_](#sync). | -| `variables` | Map | The custom variable definitions for the target. See [_](/dev-tools/bundles/settings.md#variables) and [_](/dev-tools/bundles/variables.md). | -| `workspace` | Map | The Databricks workspace for the target. [_](#workspace) | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - artifacts + - Map + - The artifacts to include in the target deployment. See [_](#artifact) + + * - bundle + - Map + - The name of the bundle when deploying to this target. + + * - cluster_id + - String + - The ID of the cluster to use for this target. + + * - compute_id + - String + - Deprecated. The ID of the compute to use for this target. + + * - default + - Boolean + - Whether this target is the default target. + + * - git + - Map + - The Git version control settings for the target. See [_](#git). + + * - mode + - String + - The deployment mode for the target. Valid values are `development` or `production`. See [_](/dev-tools/bundles/deployment-modes.md). + + * - permissions + - Sequence + - The permissions for deploying and running the bundle in the target. See [_](#permission). + + * - presets + - Map + - The deployment presets for the target. See [_](#preset). + + * - resources + - Map + - The resource definitions for the target. See [_](#resources). + + * - run_as + - Map + - The identity to use to run the bundle. See [_](#job_run_as) and [_](/dev-tools/bundles/run_as.md). + + * - sync + - Map + - The local paths to sync to the target workspace when a bundle is run or deployed. See [_](#sync). + + * - variables + - Map + - The custom variable definitions for the target. See [_](/dev-tools/bundles/settings.md#variables) and [_](/dev-tools/bundles/variables.md). + + * - workspace + - Map + - The Databricks workspace for the target. [_](#workspace) + + ### targets.artifacts The artifacts to include in the target deployment. -| Key | Type | Description | -|----------|------|----------------------------| -| `` | Map | The definition of the item | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - + - Map + - The definition of the item + Each item has the following attributes: -| Key | Type | Description | -|--------------|----------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| `build` | String | An optional set of non-default build commands that you want to run locally before deployment. For Python wheel builds, the Databricks CLI assumes that it can find a local install of the Python wheel package to run builds, and it runs the command python setup.py bdist_wheel by default during each bundle deployment. To specify multiple build commands, separate each command with double-ampersand (&&) characters. | -| `executable` | String | The executable type. | -| `files` | Sequence | The source files for the artifact, defined as an [_](#artifact_file). | -| `path` | String | The location where the built artifact will be saved. | -| `type` | String | The type of the artifact. Valid values are `wheel` or `jar` | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - build + - String + - An optional set of non-default build commands that you want to run locally before deployment. For Python wheel builds, the Databricks CLI assumes that it can find a local install of the Python wheel package to run builds, and it runs the command python setup.py bdist_wheel by default during each bundle deployment. To specify multiple build commands, separate each command with double-ampersand (&&) characters. + + * - executable + - String + - The executable type. + + * - files + - Sequence + - The source files for the artifact, defined as an [_](#artifact_file). + + * - path + - String + - The location where the built artifact will be saved. + + * - type + - String + - The type of the artifact. Valid values are `wheel` or `jar` + + ### targets.bundle The name of the bundle when deploying to this target. + #### Attributes -| Key | Type | Description | -|--------------------------|--------|------------------------------------------------------------------------------------------------------------------------------------------------------------| -| `cluster_id` | String | The ID of a cluster to use to run the bundle. See [_](/dev-tools/bundles/settings.md#cluster_id). | -| `compute_id` | String | - | -| `databricks_cli_version` | String | The Databricks CLI version to use for the bundle. See [_](/dev-tools/bundles/settings.md#databricks_cli_version). | -| `deployment` | Map | The definition of the bundle deployment. For supported attributes, see [_](#deployment) and [_](/dev-tools/bundles/deployment-modes.md). | -| `git` | Map | The Git version control details that are associated with your bundle. For supported attributes, see [_](#git) and [_](/dev-tools/bundles/settings.md#git). | -| `name` | String | The name of the bundle. | -| `uuid` | String | - | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - cluster_id + - String + - The ID of a cluster to use to run the bundle. See [_](/dev-tools/bundles/settings.md#cluster_id). + + * - compute_id + - String + - + + * - databricks_cli_version + - String + - The Databricks CLI version to use for the bundle. See [_](/dev-tools/bundles/settings.md#databricks_cli_version). + + * - deployment + - Map + - The definition of the bundle deployment. For supported attributes, see [_](#deployment) and [_](/dev-tools/bundles/deployment-modes.md). + + * - git + - Map + - The Git version control details that are associated with your bundle. For supported attributes, see [_](#git) and [_](/dev-tools/bundles/settings.md#git). + + * - name + - String + - The name of the bundle. + + * - uuid + - String + - + + ### targets.bundle.deployment The definition of the bundle deployment + #### Attributes -| Key | Type | Description | -|-----------------------|---------|---------------------------------------------------------------------------------------------------------| -| `fail_on_active_runs` | Boolean | Whether to fail on active runs. If this is set to true a deployment that is running can be interrupted. | -| `lock` | Map | The deployment lock attributes. See [_](#lock). | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - fail_on_active_runs + - Boolean + - Whether to fail on active runs. If this is set to true a deployment that is running can be interrupted. + + * - lock + - Map + - The deployment lock attributes. See [_](#lock). + + ### targets.bundle.deployment.lock The deployment lock attributes. + #### Attributes -| Key | Type | Description | -|-----------|---------|----------------------------------------------| -| `enabled` | Boolean | Whether this lock is enabled. | -| `force` | Boolean | Whether to force this lock if it is enabled. | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - enabled + - Boolean + - Whether this lock is enabled. + + * - force + - Boolean + - Whether to force this lock if it is enabled. + + ### targets.bundle.git The Git version control details that are associated with your bundle. + #### Attributes -| Key | Type | Description | -|--------------|--------|--------------------------------------------------------------------------------| -| `branch` | String | The Git branch name. See [_](/dev-tools/bundles/settings.md#git). | -| `origin_url` | String | The origin URL of the repository. See [_](/dev-tools/bundles/settings.md#git). | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - branch + - String + - The Git branch name. See [_](/dev-tools/bundles/settings.md#git). + + * - origin_url + - String + - The origin URL of the repository. See [_](/dev-tools/bundles/settings.md#git). + + ### targets.git The Git version control settings for the target. + #### Attributes -| Key | Type | Description | -|--------------|--------|--------------------------------------------------------------------------------| -| `branch` | String | The Git branch name. See [_](/dev-tools/bundles/settings.md#git). | -| `origin_url` | String | The origin URL of the repository. See [_](/dev-tools/bundles/settings.md#git). | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - branch + - String + - The Git branch name. See [_](/dev-tools/bundles/settings.md#git). + + * - origin_url + - String + - The origin URL of the repository. See [_](/dev-tools/bundles/settings.md#git). + + ### targets.presets The deployment presets for the target. + #### Attributes -| Key | Type | Description | -|----------------------------|---------|-------------------------------------------------------------------------------------------------| -| `jobs_max_concurrent_runs` | Integer | The maximum concurrent runs for a job. | -| `name_prefix` | String | The prefix for job runs of the bundle. | -| `pipelines_development` | Boolean | Whether pipeline deployments should be locked in development mode. | -| `source_linked_deployment` | Boolean | Whether to link the deployment to the bundle source. | -| `tags` | Map | The tags for the bundle deployment. | -| `trigger_pause_status` | String | A pause status to apply to all job triggers and schedules. Valid values are PAUSED or UNPAUSED. | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - jobs_max_concurrent_runs + - Integer + - The maximum concurrent runs for a job. + + * - name_prefix + - String + - The prefix for job runs of the bundle. + + * - pipelines_development + - Boolean + - Whether pipeline deployments should be locked in development mode. + + * - source_linked_deployment + - Boolean + - Whether to link the deployment to the bundle source. + + * - tags + - Map + - The tags for the bundle deployment. + + * - trigger_pause_status + - String + - A pause status to apply to all job triggers and schedules. Valid values are PAUSED or UNPAUSED. + + ### targets.presets.tags The tags for the bundle deployment. + + ### targets.resources The resource definitions for the target. + #### Attributes -| Key | Type | Description | -|---------------------------|------|------------------------------------------------------------------------------------------------------------------------| -| `clusters` | Map | The cluster definitions for the bundle. See [_](/dev-tools/bundles/resources.md#cluster) | -| `dashboards` | Map | The dashboard definitions for the bundle. See [_](/dev-tools/bundles/resources.md#dashboard) | -| `experiments` | Map | The experiment definitions for the bundle. See [_](/dev-tools/bundles/resources.md#experiment) | -| `jobs` | Map | The job definitions for the bundle. See [_](/dev-tools/bundles/resources.md#job) | -| `model_serving_endpoints` | Map | The model serving endpoint definitions for the bundle. See [_](/dev-tools/bundles/resources.md#model_serving_endpoint) | -| `models` | Map | The model definitions for the bundle. See [_](/dev-tools/bundles/resources.md#model) | -| `pipelines` | Map | The pipeline definitions for the bundle. See [_](/dev-tools/bundles/resources.md#pipeline) | -| `quality_monitors` | Map | The quality monitor definitions for the bundle. See [_](/dev-tools/bundles/resources.md#quality_monitor) | -| `registered_models` | Map | The registered model definitions for the bundle. See [_](/dev-tools/bundles/resources.md#registered_model) | -| `schemas` | Map | The schema definitions for the bundle. See [_](/dev-tools/bundles/resources.md#schema) | -| `volumes` | Map | - | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - clusters + - Map + - The cluster definitions for the bundle. See [_](/dev-tools/bundles/resources.md#cluster) + + * - dashboards + - Map + - The dashboard definitions for the bundle. See [_](/dev-tools/bundles/resources.md#dashboard) + + * - experiments + - Map + - The experiment definitions for the bundle. See [_](/dev-tools/bundles/resources.md#experiment) + + * - jobs + - Map + - The job definitions for the bundle. See [_](/dev-tools/bundles/resources.md#job) + + * - model_serving_endpoints + - Map + - The model serving endpoint definitions for the bundle. See [_](/dev-tools/bundles/resources.md#model_serving_endpoint) + + * - models + - Map + - The model definitions for the bundle. See [_](/dev-tools/bundles/resources.md#model) + + * - pipelines + - Map + - The pipeline definitions for the bundle. See [_](/dev-tools/bundles/resources.md#pipeline) + + * - quality_monitors + - Map + - The quality monitor definitions for the bundle. See [_](/dev-tools/bundles/resources.md#quality_monitor) + + * - registered_models + - Map + - The registered model definitions for the bundle. See [_](/dev-tools/bundles/resources.md#registered_model) + + * - schemas + - Map + - The schema definitions for the bundle. See [_](/dev-tools/bundles/resources.md#schema) + + * - volumes + - Map + - + + ### targets.resources.clusters The cluster definitions for the bundle. -| Key | Type | Description | -|----------|------|----------------------------| -| `` | Map | The definition of the item | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - + - Map + - The definition of the item + Each item has the following attributes: -| Key | Type | Description | -|--------------------------------|----------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| `apply_policy_default_values` | Boolean | When set to true, fixed and default values from the policy will be used for fields that are omitted. When set to false, only fixed values from the policy will be applied. | -| `autoscale` | Map | Parameters needed in order to automatically scale clusters up and down based on load. Note: autoscaling works best with DB runtime versions 3.0 or later. | -| `autotermination_minutes` | Integer | Automatically terminates the cluster after it is inactive for this time in minutes. If not set, this cluster will not be automatically terminated. If specified, the threshold must be between 10 and 10000 minutes. Users can also set this value to 0 to explicitly disable automatic termination. | -| `aws_attributes` | Map | Attributes related to clusters running on Amazon Web Services. If not specified at cluster creation, a set of default values will be used. | -| `azure_attributes` | Map | Attributes related to clusters running on Microsoft Azure. If not specified at cluster creation, a set of default values will be used. | -| `cluster_log_conf` | Map | The configuration for delivering spark logs to a long-term storage destination. Two kinds of destinations (dbfs and s3) are supported. Only one destination can be specified for one cluster. If the conf is given, the logs will be delivered to the destination every `5 mins`. The destination of driver logs is `$destination/$clusterId/driver`, while the destination of executor logs is `$destination/$clusterId/executor`. | -| `cluster_name` | String | Cluster name requested by the user. This doesn't have to be unique. If not specified at creation, the cluster name will be an empty string. | -| `custom_tags` | Map | Additional tags for cluster resources. Databricks will tag all cluster resources (e.g., AWS instances and EBS volumes) with these tags in addition to `default_tags`. Notes: - Currently, Databricks allows at most 45 custom tags - Clusters can only reuse cloud resources if the resources' tags are a subset of the cluster tags | -| `data_security_mode` | String | - | -| `docker_image` | Map | - | -| `driver_instance_pool_id` | String | The optional ID of the instance pool for the driver of the cluster belongs. The pool cluster uses the instance pool with id (instance_pool_id) if the driver pool is not assigned. | -| `driver_node_type_id` | String | The node type of the Spark driver. Note that this field is optional; if unset, the driver node type will be set as the same value as `node_type_id` defined above. | -| `enable_elastic_disk` | Boolean | Autoscaling Local Storage: when enabled, this cluster will dynamically acquire additional disk space when its Spark workers are running low on disk space. This feature requires specific AWS permissions to function correctly - refer to the User Guide for more details. | -| `enable_local_disk_encryption` | Boolean | Whether to enable LUKS on cluster VMs' local disks | -| `gcp_attributes` | Map | Attributes related to clusters running on Google Cloud Platform. If not specified at cluster creation, a set of default values will be used. | -| `init_scripts` | Sequence | The configuration for storing init scripts. Any number of destinations can be specified. The scripts are executed sequentially in the order provided. If `cluster_log_conf` is specified, init script logs are sent to `//init_scripts`. | -| `instance_pool_id` | String | The optional ID of the instance pool to which the cluster belongs. | -| `node_type_id` | String | This field encodes, through a single value, the resources available to each of the Spark nodes in this cluster. For example, the Spark nodes can be provisioned and optimized for memory or compute intensive workloads. A list of available node types can be retrieved by using the :method:clusters/listNodeTypes API call. | -| `num_workers` | Integer | Number of worker nodes that this cluster should have. A cluster has one Spark Driver and `num_workers` Executors for a total of `num_workers` + 1 Spark nodes. Note: When reading the properties of a cluster, this field reflects the desired number of workers rather than the actual current number of workers. For instance, if a cluster is resized from 5 to 10 workers, this field will immediately be updated to reflect the target size of 10 workers, whereas the workers listed in `spark_info` will gradually increase from 5 to 10 as the new nodes are provisioned. | -| `permissions` | Sequence | - | -| `policy_id` | String | The ID of the cluster policy used to create the cluster if applicable. | -| `runtime_engine` | String | - | -| `single_user_name` | String | Single user name if data_security_mode is `SINGLE_USER` | -| `spark_conf` | Map | An object containing a set of optional, user-specified Spark configuration key-value pairs. Users can also pass in a string of extra JVM options to the driver and the executors via `spark.driver.extraJavaOptions` and `spark.executor.extraJavaOptions` respectively. | -| `spark_env_vars` | Map | An object containing a set of optional, user-specified environment variable key-value pairs. Please note that key-value pair of the form (X,Y) will be exported as is (i.e., `export X='Y'`) while launching the driver and workers. In order to specify an additional set of `SPARK_DAEMON_JAVA_OPTS`, we recommend appending them to `$SPARK_DAEMON_JAVA_OPTS` as shown in the example below. This ensures that all default databricks managed environmental variables are included as well. Example Spark environment variables: `{"SPARK_WORKER_MEMORY": "28000m", "SPARK_LOCAL_DIRS": "/local_disk0"}` or `{"SPARK_DAEMON_JAVA_OPTS": "$SPARK_DAEMON_JAVA_OPTS -Dspark.shuffle.service.enabled=true"}` | -| `spark_version` | String | The Spark version of the cluster, e.g. `3.3.x-scala2.11`. A list of available Spark versions can be retrieved by using the :method:clusters/sparkVersions API call. | -| `ssh_public_keys` | Sequence | SSH public key contents that will be added to each Spark node in this cluster. The corresponding private keys can be used to login with the user name `ubuntu` on port `2200`. Up to 10 keys can be specified. | -| `workload_type` | Map | - | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - apply_policy_default_values + - Boolean + - When set to true, fixed and default values from the policy will be used for fields that are omitted. When set to false, only fixed values from the policy will be applied. + + * - autoscale + - Map + - Parameters needed in order to automatically scale clusters up and down based on load. Note: autoscaling works best with DB runtime versions 3.0 or later. + + * - autotermination_minutes + - Integer + - Automatically terminates the cluster after it is inactive for this time in minutes. If not set, this cluster will not be automatically terminated. If specified, the threshold must be between 10 and 10000 minutes. Users can also set this value to 0 to explicitly disable automatic termination. + + * - aws_attributes + - Map + - Attributes related to clusters running on Amazon Web Services. If not specified at cluster creation, a set of default values will be used. + + * - azure_attributes + - Map + - Attributes related to clusters running on Microsoft Azure. If not specified at cluster creation, a set of default values will be used. + + * - cluster_log_conf + - Map + - The configuration for delivering spark logs to a long-term storage destination. Two kinds of destinations (dbfs and s3) are supported. Only one destination can be specified for one cluster. If the conf is given, the logs will be delivered to the destination every `5 mins`. The destination of driver logs is `$destination/$clusterId/driver`, while the destination of executor logs is `$destination/$clusterId/executor`. + + * - cluster_name + - String + - Cluster name requested by the user. This doesn't have to be unique. If not specified at creation, the cluster name will be an empty string. + + * - custom_tags + - Map + - Additional tags for cluster resources. Databricks will tag all cluster resources (e.g., AWS instances and EBS volumes) with these tags in addition to `default_tags`. Notes: - Currently, Databricks allows at most 45 custom tags - Clusters can only reuse cloud resources if the resources' tags are a subset of the cluster tags + + * - data_security_mode + - String + - + + * - docker_image + - Map + - + + * - driver_instance_pool_id + - String + - The optional ID of the instance pool for the driver of the cluster belongs. The pool cluster uses the instance pool with id (instance_pool_id) if the driver pool is not assigned. + + * - driver_node_type_id + - String + - The node type of the Spark driver. Note that this field is optional; if unset, the driver node type will be set as the same value as `node_type_id` defined above. + + * - enable_elastic_disk + - Boolean + - Autoscaling Local Storage: when enabled, this cluster will dynamically acquire additional disk space when its Spark workers are running low on disk space. This feature requires specific AWS permissions to function correctly - refer to the User Guide for more details. + + * - enable_local_disk_encryption + - Boolean + - Whether to enable LUKS on cluster VMs' local disks + + * - gcp_attributes + - Map + - Attributes related to clusters running on Google Cloud Platform. If not specified at cluster creation, a set of default values will be used. + + * - init_scripts + - Sequence + - The configuration for storing init scripts. Any number of destinations can be specified. The scripts are executed sequentially in the order provided. If `cluster_log_conf` is specified, init script logs are sent to `//init_scripts`. + + * - instance_pool_id + - String + - The optional ID of the instance pool to which the cluster belongs. + + * - node_type_id + - String + - This field encodes, through a single value, the resources available to each of the Spark nodes in this cluster. For example, the Spark nodes can be provisioned and optimized for memory or compute intensive workloads. A list of available node types can be retrieved by using the :method:clusters/listNodeTypes API call. + + * - num_workers + - Integer + - Number of worker nodes that this cluster should have. A cluster has one Spark Driver and `num_workers` Executors for a total of `num_workers` + 1 Spark nodes. Note: When reading the properties of a cluster, this field reflects the desired number of workers rather than the actual current number of workers. For instance, if a cluster is resized from 5 to 10 workers, this field will immediately be updated to reflect the target size of 10 workers, whereas the workers listed in `spark_info` will gradually increase from 5 to 10 as the new nodes are provisioned. + + * - permissions + - Sequence + - + + * - policy_id + - String + - The ID of the cluster policy used to create the cluster if applicable. + + * - runtime_engine + - String + - + + * - single_user_name + - String + - Single user name if data_security_mode is `SINGLE_USER` + + * - spark_conf + - Map + - An object containing a set of optional, user-specified Spark configuration key-value pairs. Users can also pass in a string of extra JVM options to the driver and the executors via `spark.driver.extraJavaOptions` and `spark.executor.extraJavaOptions` respectively. + + * - spark_env_vars + - Map + - An object containing a set of optional, user-specified environment variable key-value pairs. Please note that key-value pair of the form (X,Y) will be exported as is (i.e., `export X='Y'`) while launching the driver and workers. In order to specify an additional set of `SPARK_DAEMON_JAVA_OPTS`, we recommend appending them to `$SPARK_DAEMON_JAVA_OPTS` as shown in the example below. This ensures that all default databricks managed environmental variables are included as well. Example Spark environment variables: `{"SPARK_WORKER_MEMORY": "28000m", "SPARK_LOCAL_DIRS": "/local_disk0"}` or `{"SPARK_DAEMON_JAVA_OPTS": "$SPARK_DAEMON_JAVA_OPTS -Dspark.shuffle.service.enabled=true"}` + + * - spark_version + - String + - The Spark version of the cluster, e.g. `3.3.x-scala2.11`. A list of available Spark versions can be retrieved by using the :method:clusters/sparkVersions API call. + + * - ssh_public_keys + - Sequence + - SSH public key contents that will be added to each Spark node in this cluster. The corresponding private keys can be used to login with the user name `ubuntu` on port `2200`. Up to 10 keys can be specified. + + * - workload_type + - Map + - + + ### targets.resources.clusters.autoscale Parameters needed in order to automatically scale clusters up and down based on load. Note: autoscaling works best with DB runtime versions 3.0 or later. + #### Attributes -| Key | Type | Description | -|---------------|---------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| `max_workers` | Integer | The maximum number of workers to which the cluster can scale up when overloaded. Note that `max_workers` must be strictly greater than `min_workers`. | -| `min_workers` | Integer | The minimum number of workers to which the cluster can scale down when underutilized. It is also the initial number of workers the cluster will have after creation. | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - max_workers + - Integer + - The maximum number of workers to which the cluster can scale up when overloaded. Note that `max_workers` must be strictly greater than `min_workers`. + + * - min_workers + - Integer + - The minimum number of workers to which the cluster can scale down when underutilized. It is also the initial number of workers the cluster will have after creation. + + ### targets.resources.clusters.aws_attributes Attributes related to clusters running on Amazon Web Services. If not specified at cluster creation, a set of default values will be used. + #### Attributes -| Key | Type | Description | -|--------------------------|---------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| `availability` | String | - | -| `ebs_volume_count` | Integer | The number of volumes launched for each instance. Users can choose up to 10 volumes. This feature is only enabled for supported node types. Legacy node types cannot specify custom EBS volumes. For node types with no instance store, at least one EBS volume needs to be specified; otherwise, cluster creation will fail. These EBS volumes will be mounted at `/ebs0`, `/ebs1`, and etc. Instance store volumes will be mounted at `/local_disk0`, `/local_disk1`, and etc. If EBS volumes are attached, Databricks will configure Spark to use only the EBS volumes for scratch storage because heterogenously sized scratch devices can lead to inefficient disk utilization. If no EBS volumes are attached, Databricks will configure Spark to use instance store volumes. Please note that if EBS volumes are specified, then the Spark configuration `spark.local.dir` will be overridden. | -| `ebs_volume_iops` | Integer | If using gp3 volumes, what IOPS to use for the disk. If this is not set, the maximum performance of a gp2 volume with the same volume size will be used. | -| `ebs_volume_size` | Integer | The size of each EBS volume (in GiB) launched for each instance. For general purpose SSD, this value must be within the range 100 - 4096. For throughput optimized HDD, this value must be within the range 500 - 4096. | -| `ebs_volume_throughput` | Integer | If using gp3 volumes, what throughput to use for the disk. If this is not set, the maximum performance of a gp2 volume with the same volume size will be used. | -| `ebs_volume_type` | String | - | -| `first_on_demand` | Integer | The first `first_on_demand` nodes of the cluster will be placed on on-demand instances. If this value is greater than 0, the cluster driver node in particular will be placed on an on-demand instance. If this value is greater than or equal to the current cluster size, all nodes will be placed on on-demand instances. If this value is less than the current cluster size, `first_on_demand` nodes will be placed on on-demand instances and the remainder will be placed on `availability` instances. Note that this value does not affect cluster size and cannot currently be mutated over the lifetime of a cluster. | -| `instance_profile_arn` | String | Nodes for this cluster will only be placed on AWS instances with this instance profile. If ommitted, nodes will be placed on instances without an IAM instance profile. The instance profile must have previously been added to the Databricks environment by an account administrator. This feature may only be available to certain customer plans. If this field is ommitted, we will pull in the default from the conf if it exists. | -| `spot_bid_price_percent` | Integer | The bid price for AWS spot instances, as a percentage of the corresponding instance type's on-demand price. For example, if this field is set to 50, and the cluster needs a new `r3.xlarge` spot instance, then the bid price is half of the price of on-demand `r3.xlarge` instances. Similarly, if this field is set to 200, the bid price is twice the price of on-demand `r3.xlarge` instances. If not specified, the default value is 100. When spot instances are requested for this cluster, only spot instances whose bid price percentage matches this field will be considered. Note that, for safety, we enforce this field to be no more than 10000. The default value and documentation here should be kept consistent with CommonConf.defaultSpotBidPricePercent and CommonConf.maxSpotBidPricePercent. | -| `zone_id` | String | Identifier for the availability zone/datacenter in which the cluster resides. This string will be of a form like "us-west-2a". The provided availability zone must be in the same region as the Databricks deployment. For example, "us-west-2a" is not a valid zone id if the Databricks deployment resides in the "us-east-1" region. This is an optional field at cluster creation, and if not specified, a default zone will be used. If the zone specified is "auto", will try to place cluster in a zone with high availability, and will retry placement in a different AZ if there is not enough capacity. The list of available zones as well as the default value can be found by using the `List Zones` method. | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - availability + - String + - + + * - ebs_volume_count + - Integer + - The number of volumes launched for each instance. Users can choose up to 10 volumes. This feature is only enabled for supported node types. Legacy node types cannot specify custom EBS volumes. For node types with no instance store, at least one EBS volume needs to be specified; otherwise, cluster creation will fail. These EBS volumes will be mounted at `/ebs0`, `/ebs1`, and etc. Instance store volumes will be mounted at `/local_disk0`, `/local_disk1`, and etc. If EBS volumes are attached, Databricks will configure Spark to use only the EBS volumes for scratch storage because heterogenously sized scratch devices can lead to inefficient disk utilization. If no EBS volumes are attached, Databricks will configure Spark to use instance store volumes. Please note that if EBS volumes are specified, then the Spark configuration `spark.local.dir` will be overridden. + + * - ebs_volume_iops + - Integer + - If using gp3 volumes, what IOPS to use for the disk. If this is not set, the maximum performance of a gp2 volume with the same volume size will be used. + + * - ebs_volume_size + - Integer + - The size of each EBS volume (in GiB) launched for each instance. For general purpose SSD, this value must be within the range 100 - 4096. For throughput optimized HDD, this value must be within the range 500 - 4096. + + * - ebs_volume_throughput + - Integer + - If using gp3 volumes, what throughput to use for the disk. If this is not set, the maximum performance of a gp2 volume with the same volume size will be used. + + * - ebs_volume_type + - String + - + + * - first_on_demand + - Integer + - The first `first_on_demand` nodes of the cluster will be placed on on-demand instances. If this value is greater than 0, the cluster driver node in particular will be placed on an on-demand instance. If this value is greater than or equal to the current cluster size, all nodes will be placed on on-demand instances. If this value is less than the current cluster size, `first_on_demand` nodes will be placed on on-demand instances and the remainder will be placed on `availability` instances. Note that this value does not affect cluster size and cannot currently be mutated over the lifetime of a cluster. + + * - instance_profile_arn + - String + - Nodes for this cluster will only be placed on AWS instances with this instance profile. If ommitted, nodes will be placed on instances without an IAM instance profile. The instance profile must have previously been added to the Databricks environment by an account administrator. This feature may only be available to certain customer plans. If this field is ommitted, we will pull in the default from the conf if it exists. + + * - spot_bid_price_percent + - Integer + - The bid price for AWS spot instances, as a percentage of the corresponding instance type's on-demand price. For example, if this field is set to 50, and the cluster needs a new `r3.xlarge` spot instance, then the bid price is half of the price of on-demand `r3.xlarge` instances. Similarly, if this field is set to 200, the bid price is twice the price of on-demand `r3.xlarge` instances. If not specified, the default value is 100. When spot instances are requested for this cluster, only spot instances whose bid price percentage matches this field will be considered. Note that, for safety, we enforce this field to be no more than 10000. The default value and documentation here should be kept consistent with CommonConf.defaultSpotBidPricePercent and CommonConf.maxSpotBidPricePercent. + + * - zone_id + - String + - Identifier for the availability zone/datacenter in which the cluster resides. This string will be of a form like "us-west-2a". The provided availability zone must be in the same region as the Databricks deployment. For example, "us-west-2a" is not a valid zone id if the Databricks deployment resides in the "us-east-1" region. This is an optional field at cluster creation, and if not specified, a default zone will be used. If the zone specified is "auto", will try to place cluster in a zone with high availability, and will retry placement in a different AZ if there is not enough capacity. The list of available zones as well as the default value can be found by using the `List Zones` method. + + ### targets.resources.clusters.azure_attributes Attributes related to clusters running on Microsoft Azure. If not specified at cluster creation, a set of default values will be used. + #### Attributes -| Key | Type | Description | -|----------------------|---------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| `availability` | String | - | -| `first_on_demand` | Integer | The first `first_on_demand` nodes of the cluster will be placed on on-demand instances. This value should be greater than 0, to make sure the cluster driver node is placed on an on-demand instance. If this value is greater than or equal to the current cluster size, all nodes will be placed on on-demand instances. If this value is less than the current cluster size, `first_on_demand` nodes will be placed on on-demand instances and the remainder will be placed on `availability` instances. Note that this value does not affect cluster size and cannot currently be mutated over the lifetime of a cluster. | -| `log_analytics_info` | Map | Defines values necessary to configure and run Azure Log Analytics agent | -| `spot_bid_max_price` | Any | The max bid price to be used for Azure spot instances. The Max price for the bid cannot be higher than the on-demand price of the instance. If not specified, the default value is -1, which specifies that the instance cannot be evicted on the basis of price, and only on the basis of availability. Further, the value should > 0 or -1. | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - availability + - String + - + + * - first_on_demand + - Integer + - The first `first_on_demand` nodes of the cluster will be placed on on-demand instances. This value should be greater than 0, to make sure the cluster driver node is placed on an on-demand instance. If this value is greater than or equal to the current cluster size, all nodes will be placed on on-demand instances. If this value is less than the current cluster size, `first_on_demand` nodes will be placed on on-demand instances and the remainder will be placed on `availability` instances. Note that this value does not affect cluster size and cannot currently be mutated over the lifetime of a cluster. + + * - log_analytics_info + - Map + - Defines values necessary to configure and run Azure Log Analytics agent + + * - spot_bid_max_price + - Any + - The max bid price to be used for Azure spot instances. The Max price for the bid cannot be higher than the on-demand price of the instance. If not specified, the default value is -1, which specifies that the instance cannot be evicted on the basis of price, and only on the basis of availability. Further, the value should > 0 or -1. + + ### targets.resources.clusters.azure_attributes.log_analytics_info Defines values necessary to configure and run Azure Log Analytics agent + #### Attributes -| Key | Type | Description | -|------------------------------|--------|-----------------------| -| `log_analytics_primary_key` | String | | -| `log_analytics_workspace_id` | String | | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - log_analytics_primary_key + - String + - + + * - log_analytics_workspace_id + - String + - + + ### targets.resources.clusters.cluster_log_conf The configuration for delivering spark logs to a long-term storage destination. Two kinds of destinations (dbfs and s3) are supported. Only one destination can be specified for one cluster. If the conf is given, the logs will be delivered to the destination every `5 mins`. The destination of driver logs is `$destination/$clusterId/driver`, while the destination of executor logs is `$destination/$clusterId/executor`. + #### Attributes -| Key | Type | Description | -|--------|------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| `dbfs` | Map | destination needs to be provided. e.g. `{ "dbfs" : { "destination" : "dbfs:/home/cluster_log" } }` | -| `s3` | Map | destination and either the region or endpoint need to be provided. e.g. `{ "s3": { "destination" : "s3://cluster_log_bucket/prefix", "region" : "us-west-2" } }` Cluster iam role is used to access s3, please make sure the cluster iam role in `instance_profile_arn` has permission to write data to the s3 destination. | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - dbfs + - Map + - destination needs to be provided. e.g. `{ "dbfs" : { "destination" : "dbfs:/home/cluster_log" } }` + + * - s3 + - Map + - destination and either the region or endpoint need to be provided. e.g. `{ "s3": { "destination" : "s3://cluster_log_bucket/prefix", "region" : "us-west-2" } }` Cluster iam role is used to access s3, please make sure the cluster iam role in `instance_profile_arn` has permission to write data to the s3 destination. + + ### targets.resources.clusters.cluster_log_conf.dbfs destination needs to be provided. e.g. `{ "dbfs" : { "destination" : "dbfs:/home/cluster_log" } }` + #### Attributes -| Key | Type | Description | -|---------------|--------|----------------------------------------| -| `destination` | String | dbfs destination, e.g. `dbfs:/my/path` | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - destination + - String + - dbfs destination, e.g. `dbfs:/my/path` + + ### targets.resources.clusters.cluster_log_conf.s3 destination and either the region or endpoint need to be provided. e.g. `{ "s3": { "destination" : "s3://cluster_log_bucket/prefix", "region" : "us-west-2" } }` Cluster iam role is used to access s3, please make sure the cluster iam role in `instance_profile_arn` has permission to write data to the s3 destination. + #### Attributes -| Key | Type | Description | -|---------------------|---------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| `canned_acl` | String | (Optional) Set canned access control list for the logs, e.g. `bucket-owner-full-control`. If `canned_cal` is set, please make sure the cluster iam role has `s3:PutObjectAcl` permission on the destination bucket and prefix. The full list of possible canned acl can be found at http://docs.aws.amazon.com/AmazonS3/latest/dev/acl-overview.html#canned-acl. Please also note that by default only the object owner gets full controls. If you are using cross account role for writing data, you may want to set `bucket-owner-full-control` to make bucket owner able to read the logs. | -| `destination` | String | S3 destination, e.g. `s3://my-bucket/some-prefix` Note that logs will be delivered using cluster iam role, please make sure you set cluster iam role and the role has write access to the destination. Please also note that you cannot use AWS keys to deliver logs. | -| `enable_encryption` | Boolean | (Optional) Flag to enable server side encryption, `false` by default. | -| `encryption_type` | String | (Optional) The encryption type, it could be `sse-s3` or `sse-kms`. It will be used only when encryption is enabled and the default type is `sse-s3`. | -| `endpoint` | String | S3 endpoint, e.g. `https://s3-us-west-2.amazonaws.com`. Either region or endpoint needs to be set. If both are set, endpoint will be used. | -| `kms_key` | String | (Optional) Kms key which will be used if encryption is enabled and encryption type is set to `sse-kms`. | -| `region` | String | S3 region, e.g. `us-west-2`. Either region or endpoint needs to be set. If both are set, endpoint will be used. | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - canned_acl + - String + - (Optional) Set canned access control list for the logs, e.g. `bucket-owner-full-control`. If `canned_cal` is set, please make sure the cluster iam role has `s3:PutObjectAcl` permission on the destination bucket and prefix. The full list of possible canned acl can be found at http://docs.aws.amazon.com/AmazonS3/latest/dev/acl-overview.html#canned-acl. Please also note that by default only the object owner gets full controls. If you are using cross account role for writing data, you may want to set `bucket-owner-full-control` to make bucket owner able to read the logs. + + * - destination + - String + - S3 destination, e.g. `s3://my-bucket/some-prefix` Note that logs will be delivered using cluster iam role, please make sure you set cluster iam role and the role has write access to the destination. Please also note that you cannot use AWS keys to deliver logs. + + * - enable_encryption + - Boolean + - (Optional) Flag to enable server side encryption, `false` by default. + + * - encryption_type + - String + - (Optional) The encryption type, it could be `sse-s3` or `sse-kms`. It will be used only when encryption is enabled and the default type is `sse-s3`. + + * - endpoint + - String + - S3 endpoint, e.g. `https://s3-us-west-2.amazonaws.com`. Either region or endpoint needs to be set. If both are set, endpoint will be used. + + * - kms_key + - String + - (Optional) Kms key which will be used if encryption is enabled and encryption type is set to `sse-kms`. + + * - region + - String + - S3 region, e.g. `us-west-2`. Either region or endpoint needs to be set. If both are set, endpoint will be used. + + ### targets.resources.clusters.custom_tags Additional tags for cluster resources. Databricks will tag all cluster resources (e.g., AWS instances and EBS volumes) with these tags in addition to `default_tags`. Notes: @@ -1142,40 +3391,98 @@ instances and EBS volumes) with these tags in addition to `default_tags`. Notes: - Currently, Databricks allows at most 45 custom tags - Clusters can only reuse cloud resources if the resources' tags are a subset of the cluster tags + + ### targets.resources.clusters.docker_image + #### Attributes -| Key | Type | Description | -|--------------|--------|--------------------------| -| `basic_auth` | Map | - | -| `url` | String | URL of the docker image. | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - basic_auth + - Map + - + + * - url + - String + - URL of the docker image. + + ### targets.resources.clusters.docker_image.basic_auth + #### Attributes -| Key | Type | Description | -|------------|--------|----------------------| -| `password` | String | Password of the user | -| `username` | String | Name of the user | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - password + - String + - Password of the user + + * - username + - String + - Name of the user + + ### targets.resources.clusters.gcp_attributes Attributes related to clusters running on Google Cloud Platform. If not specified at cluster creation, a set of default values will be used. + #### Attributes -| Key | Type | Description | -|-----------------------------|---------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| `availability` | String | - | -| `boot_disk_size` | Integer | boot disk size in GB | -| `google_service_account` | String | If provided, the cluster will impersonate the google service account when accessing gcloud services (like GCS). The google service account must have previously been added to the Databricks environment by an account administrator. | -| `local_ssd_count` | Integer | If provided, each node (workers and driver) in the cluster will have this number of local SSDs attached. Each local SSD is 375GB in size. Refer to [GCP documentation](https://cloud.google.com/compute/docs/disks/local-ssd#choose_number_local_ssds) for the supported number of local SSDs for each instance type. | -| `use_preemptible_executors` | Boolean | This field determines whether the spark executors will be scheduled to run on preemptible VMs (when set to true) versus standard compute engine VMs (when set to false; default). Note: Soon to be deprecated, use the availability field instead. | -| `zone_id` | String | Identifier for the availability zone in which the cluster resides. This can be one of the following: - "HA" => High availability, spread nodes across availability zones for a Databricks deployment region [default] - "AUTO" => Databricks picks an availability zone to schedule the cluster on. - A GCP availability zone => Pick One of the available zones for (machine type + region) from https://cloud.google.com/compute/docs/regions-zones. | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - availability + - String + - + + * - boot_disk_size + - Integer + - boot disk size in GB + + * - google_service_account + - String + - If provided, the cluster will impersonate the google service account when accessing gcloud services (like GCS). The google service account must have previously been added to the Databricks environment by an account administrator. + + * - local_ssd_count + - Integer + - If provided, each node (workers and driver) in the cluster will have this number of local SSDs attached. Each local SSD is 375GB in size. Refer to [GCP documentation](https://cloud.google.com/compute/docs/disks/local-ssd#choose_number_local_ssds) for the supported number of local SSDs for each instance type. + + * - use_preemptible_executors + - Boolean + - This field determines whether the spark executors will be scheduled to run on preemptible VMs (when set to true) versus standard compute engine VMs (when set to false; default). Note: Soon to be deprecated, use the availability field instead. + + * - zone_id + - String + - Identifier for the availability zone in which the cluster resides. This can be one of the following: - "HA" => High availability, spread nodes across availability zones for a Databricks deployment region [default] - "AUTO" => Databricks picks an availability zone to schedule the cluster on. - A GCP availability zone => Pick One of the available zones for (machine type + region) from https://cloud.google.com/compute/docs/regions-zones. + + ### targets.resources.clusters.spark_conf An object containing a set of optional, user-specified Spark configuration key-value pairs. Users can also pass in a string of extra JVM options to the driver and the executors via `spark.driver.extraJavaOptions` and `spark.executor.extraJavaOptions` respectively. + + ### targets.resources.clusters.spark_env_vars An object containing a set of optional, user-specified environment variable key-value pairs. Please note that key-value pair of the form (X,Y) will be exported as is (i.e., @@ -1188,725 +3495,2216 @@ default databricks managed environmental variables are included as well. Example Spark environment variables: `{"SPARK_WORKER_MEMORY": "28000m", "SPARK_LOCAL_DIRS": "/local_disk0"}` or `{"SPARK_DAEMON_JAVA_OPTS": "$SPARK_DAEMON_JAVA_OPTS -Dspark.shuffle.service.enabled=true"}` + + ### targets.resources.clusters.workload_type + #### Attributes -| Key | Type | Description | -|-----------|------|-------------------------------------------------------------------------| -| `clients` | Map | defined what type of clients can use the cluster. E.g. Notebooks, Jobs | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - clients + - Map + - defined what type of clients can use the cluster. E.g. Notebooks, Jobs + + ### targets.resources.clusters.workload_type.clients defined what type of clients can use the cluster. E.g. Notebooks, Jobs + #### Attributes -| Key | Type | Description | -|-------------|---------|------------------------------------------------------------| -| `jobs` | Boolean | With jobs set, the cluster can be used for jobs | -| `notebooks` | Boolean | With notebooks set, this cluster can be used for notebooks | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - jobs + - Boolean + - With jobs set, the cluster can be used for jobs + + * - notebooks + - Boolean + - With notebooks set, this cluster can be used for notebooks + + ### targets.resources.dashboards The dashboard definitions for the bundle. -| Key | Type | Description | -|----------|------|----------------------------| -| `` | Map | The definition of the item | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - + - Map + - The definition of the item + Each item has the following attributes: -| Key | Type | Description | -|------------------------|----------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| `create_time` | String | The timestamp of when the dashboard was created. | -| `dashboard_id` | String | UUID identifying the dashboard. | -| `display_name` | String | The display name of the dashboard. | -| `embed_credentials` | Boolean | - | -| `etag` | String | The etag for the dashboard. Can be optionally provided on updates to ensure that the dashboard has not been modified since the last read. This field is excluded in List Dashboards responses. | -| `file_path` | String | - | -| `lifecycle_state` | String | The state of the dashboard resource. Used for tracking trashed status. | -| `parent_path` | String | The workspace path of the folder containing the dashboard. Includes leading slash and no trailing slash. This field is excluded in List Dashboards responses. | -| `path` | String | The workspace path of the dashboard asset, including the file name. Exported dashboards always have the file extension `.lvdash.json`. This field is excluded in List Dashboards responses. | -| `permissions` | Sequence | - | -| `serialized_dashboard` | Any | The contents of the dashboard in serialized string form. This field is excluded in List Dashboards responses. Use the [get dashboard API](https://docs.databricks.com/api/workspace/lakeview/get) to retrieve an example response, which includes the `serialized_dashboard` field. This field provides the structure of the JSON string that represents the dashboard's layout and components. | -| `update_time` | String | The timestamp of when the dashboard was last updated by the user. This field is excluded in List Dashboards responses. | -| `warehouse_id` | String | The warehouse ID used to run the dashboard. | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - create_time + - String + - The timestamp of when the dashboard was created. + + * - dashboard_id + - String + - UUID identifying the dashboard. + + * - display_name + - String + - The display name of the dashboard. + + * - embed_credentials + - Boolean + - + + * - etag + - String + - The etag for the dashboard. Can be optionally provided on updates to ensure that the dashboard has not been modified since the last read. This field is excluded in List Dashboards responses. + + * - file_path + - String + - + + * - lifecycle_state + - String + - The state of the dashboard resource. Used for tracking trashed status. + + * - parent_path + - String + - The workspace path of the folder containing the dashboard. Includes leading slash and no trailing slash. This field is excluded in List Dashboards responses. + + * - path + - String + - The workspace path of the dashboard asset, including the file name. Exported dashboards always have the file extension `.lvdash.json`. This field is excluded in List Dashboards responses. + + * - permissions + - Sequence + - + + * - serialized_dashboard + - Any + - The contents of the dashboard in serialized string form. This field is excluded in List Dashboards responses. Use the [get dashboard API](https://docs.databricks.com/api/workspace/lakeview/get) to retrieve an example response, which includes the `serialized_dashboard` field. This field provides the structure of the JSON string that represents the dashboard's layout and components. + + * - update_time + - String + - The timestamp of when the dashboard was last updated by the user. This field is excluded in List Dashboards responses. + + * - warehouse_id + - String + - The warehouse ID used to run the dashboard. + + ### targets.resources.experiments The experiment definitions for the bundle. -| Key | Type | Description | -|----------|------|----------------------------| -| `` | Map | The definition of the item | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - + - Map + - The definition of the item + Each item has the following attributes: -| Key | Type | Description | -|---------------------|----------|------------------------------------------------------------------------------------------------------------------| -| `artifact_location` | String | Location where artifacts for the experiment are stored. | -| `creation_time` | Integer | Creation time | -| `experiment_id` | String | Unique identifier for the experiment. | -| `last_update_time` | Integer | Last update time | -| `lifecycle_stage` | String | Current life cycle stage of the experiment: "active" or "deleted". Deleted experiments are not returned by APIs. | -| `name` | String | Human readable name that identifies the experiment. | -| `permissions` | Sequence | - | -| `tags` | Sequence | Tags: Additional metadata key-value pairs. | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - artifact_location + - String + - Location where artifacts for the experiment are stored. + + * - creation_time + - Integer + - Creation time + + * - experiment_id + - String + - Unique identifier for the experiment. + + * - last_update_time + - Integer + - Last update time + + * - lifecycle_stage + - String + - Current life cycle stage of the experiment: "active" or "deleted". Deleted experiments are not returned by APIs. + + * - name + - String + - Human readable name that identifies the experiment. + + * - permissions + - Sequence + - + + * - tags + - Sequence + - Tags: Additional metadata key-value pairs. + + ### targets.resources.jobs The job definitions for the bundle. -| Key | Type | Description | -|----------|------|----------------------------| -| `` | Map | The definition of the item | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - + - Map + - The definition of the item + Each item has the following attributes: -| Key | Type | Description | -|-------------------------|----------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| `budget_policy_id` | String | The id of the user specified budget policy to use for this job. If not specified, a default budget policy may be applied when creating or modifying the job. See `effective_budget_policy_id` for the budget policy used by this workload. | -| `continuous` | Map | An optional continuous property for this job. The continuous property will ensure that there is always one run executing. Only one of `schedule` and `continuous` can be used. | -| `description` | String | An optional description for the job. The maximum length is 27700 characters in UTF-8 encoding. | -| `email_notifications` | Map | An optional set of email addresses that is notified when runs of this job begin or complete as well as when this job is deleted. | -| `environments` | Sequence | A list of task execution environment specifications that can be referenced by serverless tasks of this job. An environment is required to be present for serverless tasks. For serverless notebook tasks, the environment is accessible in the notebook environment panel. For other serverless tasks, the task environment is required to be specified using environment_key in the task settings. | -| `git_source` | Map | An optional specification for a remote Git repository containing the source code used by tasks. Version-controlled source code is supported by notebook, dbt, Python script, and SQL File tasks. If `git_source` is set, these tasks retrieve the file from the remote repository by default. However, this behavior can be overridden by setting `source` to `WORKSPACE` on the task. Note: dbt and SQL File tasks support only version-controlled sources. If dbt or SQL File tasks are used, `git_source` must be defined on the job. | -| `health` | Map | - | -| `job_clusters` | Sequence | A list of job cluster specifications that can be shared and reused by tasks of this job. Libraries cannot be declared in a shared job cluster. You must declare dependent libraries in task settings. | -| `max_concurrent_runs` | Integer | An optional maximum allowed number of concurrent runs of the job. Set this value if you want to be able to execute multiple runs of the same job concurrently. This is useful for example if you trigger your job on a frequent schedule and want to allow consecutive runs to overlap with each other, or if you want to trigger multiple runs which differ by their input parameters. This setting affects only new runs. For example, suppose the job’s concurrency is 4 and there are 4 concurrent active runs. Then setting the concurrency to 3 won’t kill any of the active runs. However, from then on, new runs are skipped unless there are fewer than 3 active runs. This value cannot exceed 1000. Setting this value to `0` causes all new runs to be skipped. | -| `name` | String | An optional name for the job. The maximum length is 4096 bytes in UTF-8 encoding. | -| `notification_settings` | Map | Optional notification settings that are used when sending notifications to each of the `email_notifications` and `webhook_notifications` for this job. | -| `parameters` | Sequence | Job-level parameter definitions | -| `permissions` | Sequence | - | -| `queue` | Map | The queue settings of the job. | -| `run_as` | Map | - | -| `schedule` | Map | An optional periodic schedule for this job. The default behavior is that the job only runs when triggered by clicking “Run Now” in the Jobs UI or sending an API request to `runNow`. | -| `tags` | Map | A map of tags associated with the job. These are forwarded to the cluster as cluster tags for jobs clusters, and are subject to the same limitations as cluster tags. A maximum of 25 tags can be added to the job. | -| `tasks` | Sequence | A list of task specifications to be executed by this job. | -| `timeout_seconds` | Integer | An optional timeout applied to each run of this job. A value of `0` means no timeout. | -| `trigger` | Map | A configuration to trigger a run when certain conditions are met. The default behavior is that the job runs only when triggered by clicking “Run Now” in the Jobs UI or sending an API request to `runNow`. | -| `webhook_notifications` | Map | A collection of system notification IDs to notify when runs of this job begin or complete. | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - budget_policy_id + - String + - The id of the user specified budget policy to use for this job. If not specified, a default budget policy may be applied when creating or modifying the job. See `effective_budget_policy_id` for the budget policy used by this workload. + + * - continuous + - Map + - An optional continuous property for this job. The continuous property will ensure that there is always one run executing. Only one of `schedule` and `continuous` can be used. + + * - description + - String + - An optional description for the job. The maximum length is 27700 characters in UTF-8 encoding. + + * - email_notifications + - Map + - An optional set of email addresses that is notified when runs of this job begin or complete as well as when this job is deleted. + + * - environments + - Sequence + - A list of task execution environment specifications that can be referenced by serverless tasks of this job. An environment is required to be present for serverless tasks. For serverless notebook tasks, the environment is accessible in the notebook environment panel. For other serverless tasks, the task environment is required to be specified using environment_key in the task settings. + + * - git_source + - Map + - An optional specification for a remote Git repository containing the source code used by tasks. Version-controlled source code is supported by notebook, dbt, Python script, and SQL File tasks. If `git_source` is set, these tasks retrieve the file from the remote repository by default. However, this behavior can be overridden by setting `source` to `WORKSPACE` on the task. Note: dbt and SQL File tasks support only version-controlled sources. If dbt or SQL File tasks are used, `git_source` must be defined on the job. + + * - health + - Map + - + + * - job_clusters + - Sequence + - A list of job cluster specifications that can be shared and reused by tasks of this job. Libraries cannot be declared in a shared job cluster. You must declare dependent libraries in task settings. + + * - max_concurrent_runs + - Integer + - An optional maximum allowed number of concurrent runs of the job. Set this value if you want to be able to execute multiple runs of the same job concurrently. This is useful for example if you trigger your job on a frequent schedule and want to allow consecutive runs to overlap with each other, or if you want to trigger multiple runs which differ by their input parameters. This setting affects only new runs. For example, suppose the job’s concurrency is 4 and there are 4 concurrent active runs. Then setting the concurrency to 3 won’t kill any of the active runs. However, from then on, new runs are skipped unless there are fewer than 3 active runs. This value cannot exceed 1000. Setting this value to `0` causes all new runs to be skipped. + + * - name + - String + - An optional name for the job. The maximum length is 4096 bytes in UTF-8 encoding. + + * - notification_settings + - Map + - Optional notification settings that are used when sending notifications to each of the `email_notifications` and `webhook_notifications` for this job. + + * - parameters + - Sequence + - Job-level parameter definitions + + * - permissions + - Sequence + - + + * - queue + - Map + - The queue settings of the job. + + * - run_as + - Map + - + + * - schedule + - Map + - An optional periodic schedule for this job. The default behavior is that the job only runs when triggered by clicking “Run Now” in the Jobs UI or sending an API request to `runNow`. + + * - tags + - Map + - A map of tags associated with the job. These are forwarded to the cluster as cluster tags for jobs clusters, and are subject to the same limitations as cluster tags. A maximum of 25 tags can be added to the job. + + * - tasks + - Sequence + - A list of task specifications to be executed by this job. + + * - timeout_seconds + - Integer + - An optional timeout applied to each run of this job. A value of `0` means no timeout. + + * - trigger + - Map + - A configuration to trigger a run when certain conditions are met. The default behavior is that the job runs only when triggered by clicking “Run Now” in the Jobs UI or sending an API request to `runNow`. + + * - webhook_notifications + - Map + - A collection of system notification IDs to notify when runs of this job begin or complete. + + ### targets.resources.jobs.continuous An optional continuous property for this job. The continuous property will ensure that there is always one run executing. Only one of `schedule` and `continuous` can be used. + #### Attributes -| Key | Type | Description | -|----------------|--------|----------------------------------------------------------------------------------------------| -| `pause_status` | String | Indicate whether the continuous execution of the job is paused or not. Defaults to UNPAUSED. | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - pause_status + - String + - Indicate whether the continuous execution of the job is paused or not. Defaults to UNPAUSED. + + ### targets.resources.jobs.email_notifications An optional set of email addresses that is notified when runs of this job begin or complete as well as when this job is deleted. + #### Attributes -| Key | Type | Description | -|------------------------------------------|----------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| `no_alert_for_skipped_runs` | Boolean | If true, do not send email to recipients specified in `on_failure` if the run is skipped. This field is `deprecated`. Please use the `notification_settings.no_alert_for_skipped_runs` field. | -| `on_duration_warning_threshold_exceeded` | Sequence | A list of email addresses to be notified when the duration of a run exceeds the threshold specified for the `RUN_DURATION_SECONDS` metric in the `health` field. If no rule for the `RUN_DURATION_SECONDS` metric is specified in the `health` field for the job, notifications are not sent. | -| `on_failure` | Sequence | A list of email addresses to be notified when a run unsuccessfully completes. A run is considered to have completed unsuccessfully if it ends with an `INTERNAL_ERROR` `life_cycle_state` or a `FAILED`, or `TIMED_OUT` result_state. If this is not specified on job creation, reset, or update the list is empty, and notifications are not sent. | -| `on_start` | Sequence | A list of email addresses to be notified when a run begins. If not specified on job creation, reset, or update, the list is empty, and notifications are not sent. | -| `on_streaming_backlog_exceeded` | Sequence | A list of email addresses to notify when any streaming backlog thresholds are exceeded for any stream. Streaming backlog thresholds can be set in the `health` field using the following metrics: `STREAMING_BACKLOG_BYTES`, `STREAMING_BACKLOG_RECORDS`, `STREAMING_BACKLOG_SECONDS`, or `STREAMING_BACKLOG_FILES`. Alerting is based on the 10-minute average of these metrics. If the issue persists, notifications are resent every 30 minutes. | -| `on_success` | Sequence | A list of email addresses to be notified when a run successfully completes. A run is considered to have completed successfully if it ends with a `TERMINATED` `life_cycle_state` and a `SUCCESS` result_state. If not specified on job creation, reset, or update, the list is empty, and notifications are not sent. | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - no_alert_for_skipped_runs + - Boolean + - If true, do not send email to recipients specified in `on_failure` if the run is skipped. This field is `deprecated`. Please use the `notification_settings.no_alert_for_skipped_runs` field. + + * - on_duration_warning_threshold_exceeded + - Sequence + - A list of email addresses to be notified when the duration of a run exceeds the threshold specified for the `RUN_DURATION_SECONDS` metric in the `health` field. If no rule for the `RUN_DURATION_SECONDS` metric is specified in the `health` field for the job, notifications are not sent. + + * - on_failure + - Sequence + - A list of email addresses to be notified when a run unsuccessfully completes. A run is considered to have completed unsuccessfully if it ends with an `INTERNAL_ERROR` `life_cycle_state` or a `FAILED`, or `TIMED_OUT` result_state. If this is not specified on job creation, reset, or update the list is empty, and notifications are not sent. + + * - on_start + - Sequence + - A list of email addresses to be notified when a run begins. If not specified on job creation, reset, or update, the list is empty, and notifications are not sent. + + * - on_streaming_backlog_exceeded + - Sequence + - A list of email addresses to notify when any streaming backlog thresholds are exceeded for any stream. Streaming backlog thresholds can be set in the `health` field using the following metrics: `STREAMING_BACKLOG_BYTES`, `STREAMING_BACKLOG_RECORDS`, `STREAMING_BACKLOG_SECONDS`, or `STREAMING_BACKLOG_FILES`. Alerting is based on the 10-minute average of these metrics. If the issue persists, notifications are resent every 30 minutes. + + * - on_success + - Sequence + - A list of email addresses to be notified when a run successfully completes. A run is considered to have completed successfully if it ends with a `TERMINATED` `life_cycle_state` and a `SUCCESS` result_state. If not specified on job creation, reset, or update, the list is empty, and notifications are not sent. + + ### targets.resources.jobs.git_source An optional specification for a remote Git repository containing the source code used by tasks. Version-controlled source code is supported by notebook, dbt, Python script, and SQL File tasks. If `git_source` is set, these tasks retrieve the file from the remote repository by default. However, this behavior can be overridden by setting `source` to `WORKSPACE` on the task. Note: dbt and SQL File tasks support only version-controlled sources. If dbt or SQL File tasks are used, `git_source` must be defined on the job. + #### Attributes -| Key | Type | Description | -|----------------|--------|--------------------------------------------------------------------------------------------------------------------------------------| -| `git_branch` | String | Name of the branch to be checked out and used by this job. This field cannot be specified in conjunction with git_tag or git_commit. | -| `git_commit` | String | Commit to be checked out and used by this job. This field cannot be specified in conjunction with git_branch or git_tag. | -| `git_provider` | String | Unique identifier of the service used to host the Git repository. The value is case insensitive. | -| `git_tag` | String | Name of the tag to be checked out and used by this job. This field cannot be specified in conjunction with git_branch or git_commit. | -| `git_url` | String | URL of the repository to be cloned by this job. | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - git_branch + - String + - Name of the branch to be checked out and used by this job. This field cannot be specified in conjunction with git_tag or git_commit. + + * - git_commit + - String + - Commit to be checked out and used by this job. This field cannot be specified in conjunction with git_branch or git_tag. + + * - git_provider + - String + - Unique identifier of the service used to host the Git repository. The value is case insensitive. + + * - git_tag + - String + - Name of the tag to be checked out and used by this job. This field cannot be specified in conjunction with git_branch or git_commit. + + * - git_url + - String + - URL of the repository to be cloned by this job. + + ### targets.resources.jobs.health + #### Attributes -| Key | Type | Description | -|---------|----------|-------------| -| `rules` | Sequence | - | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - rules + - Sequence + - + + ### targets.resources.jobs.notification_settings Optional notification settings that are used when sending notifications to each of the `email_notifications` and `webhook_notifications` for this job. + #### Attributes -| Key | Type | Description | -|------------------------------|---------|----------------------------------------------------------------------------------------------------| -| `no_alert_for_canceled_runs` | Boolean | If true, do not send notifications to recipients specified in `on_failure` if the run is canceled. | -| `no_alert_for_skipped_runs` | Boolean | If true, do not send notifications to recipients specified in `on_failure` if the run is skipped. | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - no_alert_for_canceled_runs + - Boolean + - If true, do not send notifications to recipients specified in `on_failure` if the run is canceled. + + * - no_alert_for_skipped_runs + - Boolean + - If true, do not send notifications to recipients specified in `on_failure` if the run is skipped. + + ### targets.resources.jobs.queue The queue settings of the job. + #### Attributes -| Key | Type | Description | -|-----------|---------|-----------------------------------------------------------------| -| `enabled` | Boolean | If true, enable queueing for the job. This is a required field. | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - enabled + - Boolean + - If true, enable queueing for the job. This is a required field. + + ### targets.resources.jobs.run_as + #### Attributes -| Key | Type | Description | -|--------------------------|--------|--------------------------------------------------------------------------------------------------------------| -| `service_principal_name` | String | Application ID of an active service principal. Setting this field requires the `servicePrincipal/user` role. | -| `user_name` | String | The email of an active workspace user. Non-admin users can only set this field to their own email. | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - service_principal_name + - String + - Application ID of an active service principal. Setting this field requires the `servicePrincipal/user` role. + + * - user_name + - String + - The email of an active workspace user. Non-admin users can only set this field to their own email. + + ### targets.resources.jobs.schedule An optional periodic schedule for this job. The default behavior is that the job only runs when triggered by clicking “Run Now” in the Jobs UI or sending an API request to `runNow`. + #### Attributes -| Key | Type | Description | -|--------------------------|--------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| `pause_status` | String | Indicate whether this schedule is paused or not. | -| `quartz_cron_expression` | String | A Cron expression using Quartz syntax that describes the schedule for a job. See [Cron Trigger](http://www.quartz-scheduler.org/documentation/quartz-2.3.0/tutorials/crontrigger.html) for details. This field is required. | -| `timezone_id` | String | A Java timezone ID. The schedule for a job is resolved with respect to this timezone. See [Java TimeZone](https://docs.oracle.com/javase/7/docs/api/java/util/TimeZone.html) for details. This field is required. | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - pause_status + - String + - Indicate whether this schedule is paused or not. + + * - quartz_cron_expression + - String + - A Cron expression using Quartz syntax that describes the schedule for a job. See [Cron Trigger](http://www.quartz-scheduler.org/documentation/quartz-2.3.0/tutorials/crontrigger.html) for details. This field is required. + + * - timezone_id + - String + - A Java timezone ID. The schedule for a job is resolved with respect to this timezone. See [Java TimeZone](https://docs.oracle.com/javase/7/docs/api/java/util/TimeZone.html) for details. This field is required. + + ### targets.resources.jobs.tags A map of tags associated with the job. These are forwarded to the cluster as cluster tags for jobs clusters, and are subject to the same limitations as cluster tags. A maximum of 25 tags can be added to the job. + + ### targets.resources.jobs.trigger A configuration to trigger a run when certain conditions are met. The default behavior is that the job runs only when triggered by clicking “Run Now” in the Jobs UI or sending an API request to `runNow`. + #### Attributes -| Key | Type | Description | -|----------------|--------|-------------------------------------------------------------------------| -| `file_arrival` | Map | File arrival trigger settings. | -| `pause_status` | String | Whether this trigger is paused or not. | -| `periodic` | Map | Periodic trigger settings. | -| `table` | Map | Old table trigger settings name. Deprecated in favor of `table_update`. | -| `table_update` | Map | - | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - file_arrival + - Map + - File arrival trigger settings. + + * - pause_status + - String + - Whether this trigger is paused or not. + + * - periodic + - Map + - Periodic trigger settings. + + * - table + - Map + - Old table trigger settings name. Deprecated in favor of `table_update`. + + * - table_update + - Map + - + + ### targets.resources.jobs.trigger.file_arrival File arrival trigger settings. + #### Attributes -| Key | Type | Description | -|-------------------------------------|---------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| `min_time_between_triggers_seconds` | Integer | If set, the trigger starts a run only after the specified amount of time passed since the last time the trigger fired. The minimum allowed value is 60 seconds | -| `url` | String | URL to be monitored for file arrivals. The path must point to the root or a subpath of the external location. | -| `wait_after_last_change_seconds` | Integer | If set, the trigger starts a run only after no file activity has occurred for the specified amount of time. This makes it possible to wait for a batch of incoming files to arrive before triggering a run. The minimum allowed value is 60 seconds. | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - min_time_between_triggers_seconds + - Integer + - If set, the trigger starts a run only after the specified amount of time passed since the last time the trigger fired. The minimum allowed value is 60 seconds + + * - url + - String + - URL to be monitored for file arrivals. The path must point to the root or a subpath of the external location. + + * - wait_after_last_change_seconds + - Integer + - If set, the trigger starts a run only after no file activity has occurred for the specified amount of time. This makes it possible to wait for a batch of incoming files to arrive before triggering a run. The minimum allowed value is 60 seconds. + + ### targets.resources.jobs.trigger.periodic Periodic trigger settings. + #### Attributes -| Key | Type | Description | -|------------|---------|-----------------------------------------------| -| `interval` | Integer | The interval at which the trigger should run. | -| `unit` | String | The unit of time for the interval. | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - interval + - Integer + - The interval at which the trigger should run. + + * - unit + - String + - The unit of time for the interval. + + ### targets.resources.jobs.trigger.table Old table trigger settings name. Deprecated in favor of `table_update`. + #### Attributes -| Key | Type | Description | -|-------------------------------------|----------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| `condition` | String | The table(s) condition based on which to trigger a job run. | -| `min_time_between_triggers_seconds` | Integer | If set, the trigger starts a run only after the specified amount of time has passed since the last time the trigger fired. The minimum allowed value is 60 seconds. | -| `table_names` | Sequence | A list of Delta tables to monitor for changes. The table name must be in the format `catalog_name.schema_name.table_name`. | -| `wait_after_last_change_seconds` | Integer | If set, the trigger starts a run only after no table updates have occurred for the specified time and can be used to wait for a series of table updates before triggering a run. The minimum allowed value is 60 seconds. | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - condition + - String + - The table(s) condition based on which to trigger a job run. + + * - min_time_between_triggers_seconds + - Integer + - If set, the trigger starts a run only after the specified amount of time has passed since the last time the trigger fired. The minimum allowed value is 60 seconds. + + * - table_names + - Sequence + - A list of Delta tables to monitor for changes. The table name must be in the format `catalog_name.schema_name.table_name`. + + * - wait_after_last_change_seconds + - Integer + - If set, the trigger starts a run only after no table updates have occurred for the specified time and can be used to wait for a series of table updates before triggering a run. The minimum allowed value is 60 seconds. + + ### targets.resources.jobs.trigger.table_update + #### Attributes -| Key | Type | Description | -|-------------------------------------|----------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| `condition` | String | The table(s) condition based on which to trigger a job run. | -| `min_time_between_triggers_seconds` | Integer | If set, the trigger starts a run only after the specified amount of time has passed since the last time the trigger fired. The minimum allowed value is 60 seconds. | -| `table_names` | Sequence | A list of Delta tables to monitor for changes. The table name must be in the format `catalog_name.schema_name.table_name`. | -| `wait_after_last_change_seconds` | Integer | If set, the trigger starts a run only after no table updates have occurred for the specified time and can be used to wait for a series of table updates before triggering a run. The minimum allowed value is 60 seconds. | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - condition + - String + - The table(s) condition based on which to trigger a job run. + + * - min_time_between_triggers_seconds + - Integer + - If set, the trigger starts a run only after the specified amount of time has passed since the last time the trigger fired. The minimum allowed value is 60 seconds. + + * - table_names + - Sequence + - A list of Delta tables to monitor for changes. The table name must be in the format `catalog_name.schema_name.table_name`. + + * - wait_after_last_change_seconds + - Integer + - If set, the trigger starts a run only after no table updates have occurred for the specified time and can be used to wait for a series of table updates before triggering a run. The minimum allowed value is 60 seconds. + + ### targets.resources.jobs.webhook_notifications A collection of system notification IDs to notify when runs of this job begin or complete. + #### Attributes -| Key | Type | Description | -|------------------------------------------|----------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| `on_duration_warning_threshold_exceeded` | Sequence | An optional list of system notification IDs to call when the duration of a run exceeds the threshold specified for the `RUN_DURATION_SECONDS` metric in the `health` field. A maximum of 3 destinations can be specified for the `on_duration_warning_threshold_exceeded` property. | -| `on_failure` | Sequence | An optional list of system notification IDs to call when the run fails. A maximum of 3 destinations can be specified for the `on_failure` property. | -| `on_start` | Sequence | An optional list of system notification IDs to call when the run starts. A maximum of 3 destinations can be specified for the `on_start` property. | -| `on_streaming_backlog_exceeded` | Sequence | An optional list of system notification IDs to call when any streaming backlog thresholds are exceeded for any stream. Streaming backlog thresholds can be set in the `health` field using the following metrics: `STREAMING_BACKLOG_BYTES`, `STREAMING_BACKLOG_RECORDS`, `STREAMING_BACKLOG_SECONDS`, or `STREAMING_BACKLOG_FILES`. Alerting is based on the 10-minute average of these metrics. If the issue persists, notifications are resent every 30 minutes. A maximum of 3 destinations can be specified for the `on_streaming_backlog_exceeded` property. | -| `on_success` | Sequence | An optional list of system notification IDs to call when the run completes successfully. A maximum of 3 destinations can be specified for the `on_success` property. | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - on_duration_warning_threshold_exceeded + - Sequence + - An optional list of system notification IDs to call when the duration of a run exceeds the threshold specified for the `RUN_DURATION_SECONDS` metric in the `health` field. A maximum of 3 destinations can be specified for the `on_duration_warning_threshold_exceeded` property. + + * - on_failure + - Sequence + - An optional list of system notification IDs to call when the run fails. A maximum of 3 destinations can be specified for the `on_failure` property. + + * - on_start + - Sequence + - An optional list of system notification IDs to call when the run starts. A maximum of 3 destinations can be specified for the `on_start` property. + + * - on_streaming_backlog_exceeded + - Sequence + - An optional list of system notification IDs to call when any streaming backlog thresholds are exceeded for any stream. Streaming backlog thresholds can be set in the `health` field using the following metrics: `STREAMING_BACKLOG_BYTES`, `STREAMING_BACKLOG_RECORDS`, `STREAMING_BACKLOG_SECONDS`, or `STREAMING_BACKLOG_FILES`. Alerting is based on the 10-minute average of these metrics. If the issue persists, notifications are resent every 30 minutes. A maximum of 3 destinations can be specified for the `on_streaming_backlog_exceeded` property. + + * - on_success + - Sequence + - An optional list of system notification IDs to call when the run completes successfully. A maximum of 3 destinations can be specified for the `on_success` property. + + ### targets.resources.model_serving_endpoints The model serving endpoint definitions for the bundle. -| Key | Type | Description | -|----------|------|----------------------------| -| `` | Map | The definition of the item | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - + - Map + - The definition of the item + Each item has the following attributes: -| Key | Type | Description | -|-------------------|----------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| `ai_gateway` | Map | The AI Gateway configuration for the serving endpoint. NOTE: only external model endpoints are supported as of now. | -| `config` | Map | The core config of the serving endpoint. | -| `name` | String | The name of the serving endpoint. This field is required and must be unique across a Databricks workspace. An endpoint name can consist of alphanumeric characters, dashes, and underscores. | -| `permissions` | Sequence | - | -| `rate_limits` | Sequence | Rate limits to be applied to the serving endpoint. NOTE: this field is deprecated, please use AI Gateway to manage rate limits. | -| `route_optimized` | Boolean | Enable route optimization for the serving endpoint. | -| `tags` | Sequence | Tags to be attached to the serving endpoint and automatically propagated to billing logs. | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - ai_gateway + - Map + - The AI Gateway configuration for the serving endpoint. NOTE: only external model endpoints are supported as of now. + + * - config + - Map + - The core config of the serving endpoint. + + * - name + - String + - The name of the serving endpoint. This field is required and must be unique across a Databricks workspace. An endpoint name can consist of alphanumeric characters, dashes, and underscores. + + * - permissions + - Sequence + - + + * - rate_limits + - Sequence + - Rate limits to be applied to the serving endpoint. NOTE: this field is deprecated, please use AI Gateway to manage rate limits. + + * - route_optimized + - Boolean + - Enable route optimization for the serving endpoint. + + * - tags + - Sequence + - Tags to be attached to the serving endpoint and automatically propagated to billing logs. + + ### targets.resources.model_serving_endpoints.ai_gateway The AI Gateway configuration for the serving endpoint. NOTE: only external model endpoints are supported as of now. + #### Attributes -| Key | Type | Description | -|--------------------------|----------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| `guardrails` | Map | Configuration for AI Guardrails to prevent unwanted data and unsafe data in requests and responses. | -| `inference_table_config` | Map | Configuration for payload logging using inference tables. Use these tables to monitor and audit data being sent to and received from model APIs and to improve model quality. | -| `rate_limits` | Sequence | Configuration for rate limits which can be set to limit endpoint traffic. | -| `usage_tracking_config` | Map | Configuration to enable usage tracking using system tables. These tables allow you to monitor operational usage on endpoints and their associated costs. | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - guardrails + - Map + - Configuration for AI Guardrails to prevent unwanted data and unsafe data in requests and responses. + + * - inference_table_config + - Map + - Configuration for payload logging using inference tables. Use these tables to monitor and audit data being sent to and received from model APIs and to improve model quality. + + * - rate_limits + - Sequence + - Configuration for rate limits which can be set to limit endpoint traffic. + + * - usage_tracking_config + - Map + - Configuration to enable usage tracking using system tables. These tables allow you to monitor operational usage on endpoints and their associated costs. + + ### targets.resources.model_serving_endpoints.ai_gateway.guardrails Configuration for AI Guardrails to prevent unwanted data and unsafe data in requests and responses. + #### Attributes -| Key | Type | Description | -|----------|------|---------------------------------------------| -| `input` | Map | Configuration for input guardrail filters. | -| `output` | Map | Configuration for output guardrail filters. | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - input + - Map + - Configuration for input guardrail filters. + + * - output + - Map + - Configuration for output guardrail filters. + + ### targets.resources.model_serving_endpoints.ai_gateway.guardrails.input Configuration for input guardrail filters. + #### Attributes -| Key | Type | Description | -|--------------------|----------|--------------------------------------------------------------------------------------------------------------------------------------------| -| `invalid_keywords` | Sequence | List of invalid keywords. AI guardrail uses keyword or string matching to decide if the keyword exists in the request or response content. | -| `pii` | Map | Configuration for guardrail PII filter. | -| `safety` | Boolean | Indicates whether the safety filter is enabled. | -| `valid_topics` | Sequence | The list of allowed topics. Given a chat request, this guardrail flags the request if its topic is not in the allowed topics. | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - invalid_keywords + - Sequence + - List of invalid keywords. AI guardrail uses keyword or string matching to decide if the keyword exists in the request or response content. + + * - pii + - Map + - Configuration for guardrail PII filter. + + * - safety + - Boolean + - Indicates whether the safety filter is enabled. + + * - valid_topics + - Sequence + - The list of allowed topics. Given a chat request, this guardrail flags the request if its topic is not in the allowed topics. + + ### targets.resources.model_serving_endpoints.ai_gateway.guardrails.input.pii Configuration for guardrail PII filter. + #### Attributes -| Key | Type | Description | -|------------|--------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| `behavior` | String | Behavior for PII filter. Currently only 'BLOCK' is supported. If 'BLOCK' is set for the input guardrail and the request contains PII, the request is not sent to the model server and 400 status code is returned; if 'BLOCK' is set for the output guardrail and the model response contains PII, the PII info in the response is redacted and 400 status code is returned. | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - behavior + - String + - Behavior for PII filter. Currently only 'BLOCK' is supported. If 'BLOCK' is set for the input guardrail and the request contains PII, the request is not sent to the model server and 400 status code is returned; if 'BLOCK' is set for the output guardrail and the model response contains PII, the PII info in the response is redacted and 400 status code is returned. + + ### targets.resources.model_serving_endpoints.ai_gateway.guardrails.output Configuration for output guardrail filters. + #### Attributes -| Key | Type | Description | -|--------------------|----------|--------------------------------------------------------------------------------------------------------------------------------------------| -| `invalid_keywords` | Sequence | List of invalid keywords. AI guardrail uses keyword or string matching to decide if the keyword exists in the request or response content. | -| `pii` | Map | Configuration for guardrail PII filter. | -| `safety` | Boolean | Indicates whether the safety filter is enabled. | -| `valid_topics` | Sequence | The list of allowed topics. Given a chat request, this guardrail flags the request if its topic is not in the allowed topics. | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - invalid_keywords + - Sequence + - List of invalid keywords. AI guardrail uses keyword or string matching to decide if the keyword exists in the request or response content. + + * - pii + - Map + - Configuration for guardrail PII filter. + + * - safety + - Boolean + - Indicates whether the safety filter is enabled. + + * - valid_topics + - Sequence + - The list of allowed topics. Given a chat request, this guardrail flags the request if its topic is not in the allowed topics. + + ### targets.resources.model_serving_endpoints.ai_gateway.guardrails.output.pii Configuration for guardrail PII filter. + #### Attributes -| Key | Type | Description | -|------------|--------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| `behavior` | String | Behavior for PII filter. Currently only 'BLOCK' is supported. If 'BLOCK' is set for the input guardrail and the request contains PII, the request is not sent to the model server and 400 status code is returned; if 'BLOCK' is set for the output guardrail and the model response contains PII, the PII info in the response is redacted and 400 status code is returned. | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - behavior + - String + - Behavior for PII filter. Currently only 'BLOCK' is supported. If 'BLOCK' is set for the input guardrail and the request contains PII, the request is not sent to the model server and 400 status code is returned; if 'BLOCK' is set for the output guardrail and the model response contains PII, the PII info in the response is redacted and 400 status code is returned. + + ### targets.resources.model_serving_endpoints.ai_gateway.inference_table_config Configuration for payload logging using inference tables. Use these tables to monitor and audit data being sent to and received from model APIs and to improve model quality. + #### Attributes -| Key | Type | Description | -|---------------------|---------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| `catalog_name` | String | The name of the catalog in Unity Catalog. Required when enabling inference tables. NOTE: On update, you have to disable inference table first in order to change the catalog name. | -| `enabled` | Boolean | Indicates whether the inference table is enabled. | -| `schema_name` | String | The name of the schema in Unity Catalog. Required when enabling inference tables. NOTE: On update, you have to disable inference table first in order to change the schema name. | -| `table_name_prefix` | String | The prefix of the table in Unity Catalog. NOTE: On update, you have to disable inference table first in order to change the prefix name. | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - catalog_name + - String + - The name of the catalog in Unity Catalog. Required when enabling inference tables. NOTE: On update, you have to disable inference table first in order to change the catalog name. + + * - enabled + - Boolean + - Indicates whether the inference table is enabled. + + * - schema_name + - String + - The name of the schema in Unity Catalog. Required when enabling inference tables. NOTE: On update, you have to disable inference table first in order to change the schema name. + + * - table_name_prefix + - String + - The prefix of the table in Unity Catalog. NOTE: On update, you have to disable inference table first in order to change the prefix name. + + ### targets.resources.model_serving_endpoints.ai_gateway.usage_tracking_config Configuration to enable usage tracking using system tables. These tables allow you to monitor operational usage on endpoints and their associated costs. + #### Attributes -| Key | Type | Description | -|-----------|---------|-----------------------------------| -| `enabled` | Boolean | Whether to enable usage tracking. | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - enabled + - Boolean + - Whether to enable usage tracking. + + ### targets.resources.model_serving_endpoints.config The core config of the serving endpoint. + #### Attributes -| Key | Type | Description | -|-----------------------|----------|--------------------------------------------------------------------------------------------------------------------------------------------------| -| `auto_capture_config` | Map | Configuration for Inference Tables which automatically logs requests and responses to Unity Catalog. | -| `served_entities` | Sequence | A list of served entities for the endpoint to serve. A serving endpoint can have up to 15 served entities. | -| `served_models` | Sequence | (Deprecated, use served_entities instead) A list of served models for the endpoint to serve. A serving endpoint can have up to 15 served models. | -| `traffic_config` | Map | The traffic config defining how invocations to the serving endpoint should be routed. | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - auto_capture_config + - Map + - Configuration for Inference Tables which automatically logs requests and responses to Unity Catalog. + + * - served_entities + - Sequence + - A list of served entities for the endpoint to serve. A serving endpoint can have up to 15 served entities. + + * - served_models + - Sequence + - (Deprecated, use served_entities instead) A list of served models for the endpoint to serve. A serving endpoint can have up to 15 served models. + + * - traffic_config + - Map + - The traffic config defining how invocations to the serving endpoint should be routed. + + ### targets.resources.model_serving_endpoints.config.auto_capture_config Configuration for Inference Tables which automatically logs requests and responses to Unity Catalog. + #### Attributes -| Key | Type | Description | -|---------------------|---------|------------------------------------------------------------------------------------------------------------------------------------------| -| `catalog_name` | String | The name of the catalog in Unity Catalog. NOTE: On update, you cannot change the catalog name if the inference table is already enabled. | -| `enabled` | Boolean | Indicates whether the inference table is enabled. | -| `schema_name` | String | The name of the schema in Unity Catalog. NOTE: On update, you cannot change the schema name if the inference table is already enabled. | -| `table_name_prefix` | String | The prefix of the table in Unity Catalog. NOTE: On update, you cannot change the prefix name if the inference table is already enabled. | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - catalog_name + - String + - The name of the catalog in Unity Catalog. NOTE: On update, you cannot change the catalog name if the inference table is already enabled. + + * - enabled + - Boolean + - Indicates whether the inference table is enabled. + + * - schema_name + - String + - The name of the schema in Unity Catalog. NOTE: On update, you cannot change the schema name if the inference table is already enabled. + + * - table_name_prefix + - String + - The prefix of the table in Unity Catalog. NOTE: On update, you cannot change the prefix name if the inference table is already enabled. + + ### targets.resources.model_serving_endpoints.config.traffic_config The traffic config defining how invocations to the serving endpoint should be routed. + #### Attributes -| Key | Type | Description | -|----------|----------|---------------------------------------------------------------| -| `routes` | Sequence | The list of routes that define traffic to each served entity. | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - routes + - Sequence + - The list of routes that define traffic to each served entity. + + ### targets.resources.models The model definitions for the bundle. -| Key | Type | Description | -|----------|------|----------------------------| -| `` | Map | The definition of the item | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - + - Map + - The definition of the item + Each item has the following attributes: -| Key | Type | Description | -|--------------------------|----------|-------------------------------------------------------------------------------------------------------| -| `creation_timestamp` | Integer | Timestamp recorded when this `registered_model` was created. | -| `description` | String | Description of this `registered_model`. | -| `last_updated_timestamp` | Integer | Timestamp recorded when metadata for this `registered_model` was last updated. | -| `latest_versions` | Sequence | Collection of latest model versions for each stage. Only contains models with current `READY` status. | -| `name` | String | Unique name for the model. | -| `permissions` | Sequence | - | -| `tags` | Sequence | Tags: Additional metadata key-value pairs for this `registered_model`. | -| `user_id` | String | User that created this `registered_model` | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - creation_timestamp + - Integer + - Timestamp recorded when this `registered_model` was created. + + * - description + - String + - Description of this `registered_model`. + + * - last_updated_timestamp + - Integer + - Timestamp recorded when metadata for this `registered_model` was last updated. + + * - latest_versions + - Sequence + - Collection of latest model versions for each stage. Only contains models with current `READY` status. + + * - name + - String + - Unique name for the model. + + * - permissions + - Sequence + - + + * - tags + - Sequence + - Tags: Additional metadata key-value pairs for this `registered_model`. + + * - user_id + - String + - User that created this `registered_model` + + ### targets.resources.pipelines The pipeline definitions for the bundle. -| Key | Type | Description | -|----------|------|----------------------------| -| `` | Map | The definition of the item | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - + - Map + - The definition of the item + Each item has the following attributes: -| Key | Type | Description | -|------------------------|----------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| `budget_policy_id` | String | Budget policy of this pipeline. | -| `catalog` | String | A catalog in Unity Catalog to publish data from this pipeline to. If `target` is specified, tables in this pipeline are published to a `target` schema inside `catalog` (for example, `catalog`.`target`.`table`). If `target` is not specified, no data is published to Unity Catalog. | -| `channel` | String | DLT Release Channel that specifies which version to use. | -| `clusters` | Sequence | Cluster settings for this pipeline deployment. | -| `configuration` | Map | String-String configuration for this pipeline execution. | -| `continuous` | Boolean | Whether the pipeline is continuous or triggered. This replaces `trigger`. | -| `deployment` | Map | Deployment type of this pipeline. | -| `development` | Boolean | Whether the pipeline is in Development mode. Defaults to false. | -| `edition` | String | Pipeline product edition. | -| `filters` | Map | Filters on which Pipeline packages to include in the deployed graph. | -| `gateway_definition` | Map | The definition of a gateway pipeline to support change data capture. | -| `id` | String | Unique identifier for this pipeline. | -| `ingestion_definition` | Map | The configuration for a managed ingestion pipeline. These settings cannot be used with the 'libraries', 'target' or 'catalog' settings. | -| `libraries` | Sequence | Libraries or code needed by this deployment. | -| `name` | String | Friendly identifier for this pipeline. | -| `notifications` | Sequence | List of notification settings for this pipeline. | -| `permissions` | Sequence | - | -| `photon` | Boolean | Whether Photon is enabled for this pipeline. | -| `restart_window` | Map | Restart window of this pipeline. | -| `schema` | String | The default schema (database) where tables are read from or published to. The presence of this field implies that the pipeline is in direct publishing mode. | -| `serverless` | Boolean | Whether serverless compute is enabled for this pipeline. | -| `storage` | String | DBFS root directory for storing checkpoints and tables. | -| `target` | String | Target schema (database) to add tables in this pipeline to. If not specified, no data is published to the Hive metastore or Unity Catalog. To publish to Unity Catalog, also specify `catalog`. | -| `trigger` | Map | Which pipeline trigger to use. Deprecated: Use `continuous` instead. | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - budget_policy_id + - String + - Budget policy of this pipeline. + + * - catalog + - String + - A catalog in Unity Catalog to publish data from this pipeline to. If `target` is specified, tables in this pipeline are published to a `target` schema inside `catalog` (for example, `catalog`.`target`.`table`). If `target` is not specified, no data is published to Unity Catalog. + + * - channel + - String + - DLT Release Channel that specifies which version to use. + + * - clusters + - Sequence + - Cluster settings for this pipeline deployment. + + * - configuration + - Map + - String-String configuration for this pipeline execution. + + * - continuous + - Boolean + - Whether the pipeline is continuous or triggered. This replaces `trigger`. + + * - deployment + - Map + - Deployment type of this pipeline. + + * - development + - Boolean + - Whether the pipeline is in Development mode. Defaults to false. + + * - edition + - String + - Pipeline product edition. + + * - filters + - Map + - Filters on which Pipeline packages to include in the deployed graph. + + * - gateway_definition + - Map + - The definition of a gateway pipeline to support change data capture. + + * - id + - String + - Unique identifier for this pipeline. + + * - ingestion_definition + - Map + - The configuration for a managed ingestion pipeline. These settings cannot be used with the 'libraries', 'target' or 'catalog' settings. + + * - libraries + - Sequence + - Libraries or code needed by this deployment. + + * - name + - String + - Friendly identifier for this pipeline. + + * - notifications + - Sequence + - List of notification settings for this pipeline. + + * - permissions + - Sequence + - + + * - photon + - Boolean + - Whether Photon is enabled for this pipeline. + + * - restart_window + - Map + - Restart window of this pipeline. + + * - schema + - String + - The default schema (database) where tables are read from or published to. The presence of this field implies that the pipeline is in direct publishing mode. + + * - serverless + - Boolean + - Whether serverless compute is enabled for this pipeline. + + * - storage + - String + - DBFS root directory for storing checkpoints and tables. + + * - target + - String + - Target schema (database) to add tables in this pipeline to. If not specified, no data is published to the Hive metastore or Unity Catalog. To publish to Unity Catalog, also specify `catalog`. + + * - trigger + - Map + - Which pipeline trigger to use. Deprecated: Use `continuous` instead. + + ### targets.resources.pipelines.configuration String-String configuration for this pipeline execution. + + ### targets.resources.pipelines.deployment Deployment type of this pipeline. + #### Attributes -| Key | Type | Description | -|----------------------|--------|----------------------------------------------------------------| -| `kind` | String | The deployment method that manages the pipeline. | -| `metadata_file_path` | String | The path to the file containing metadata about the deployment. | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - kind + - String + - The deployment method that manages the pipeline. + + * - metadata_file_path + - String + - The path to the file containing metadata about the deployment. + + ### targets.resources.pipelines.filters Filters on which Pipeline packages to include in the deployed graph. + #### Attributes -| Key | Type | Description | -|-----------|----------|-------------------| -| `exclude` | Sequence | Paths to exclude. | -| `include` | Sequence | Paths to include. | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - exclude + - Sequence + - Paths to exclude. + + * - include + - Sequence + - Paths to include. + + ### targets.resources.pipelines.gateway_definition The definition of a gateway pipeline to support change data capture. + #### Attributes -| Key | Type | Description | -|---------------------------|--------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| `connection_id` | String | [Deprecated, use connection_name instead] Immutable. The Unity Catalog connection that this gateway pipeline uses to communicate with the source. | -| `connection_name` | String | Immutable. The Unity Catalog connection that this gateway pipeline uses to communicate with the source. | -| `gateway_storage_catalog` | String | Required, Immutable. The name of the catalog for the gateway pipeline's storage location. | -| `gateway_storage_name` | String | Optional. The Unity Catalog-compatible name for the gateway storage location. This is the destination to use for the data that is extracted by the gateway. Delta Live Tables system will automatically create the storage location under the catalog and schema. | -| `gateway_storage_schema` | String | Required, Immutable. The name of the schema for the gateway pipelines's storage location. | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - connection_id + - String + - [Deprecated, use connection_name instead] Immutable. The Unity Catalog connection that this gateway pipeline uses to communicate with the source. + + * - connection_name + - String + - Immutable. The Unity Catalog connection that this gateway pipeline uses to communicate with the source. + + * - gateway_storage_catalog + - String + - Required, Immutable. The name of the catalog for the gateway pipeline's storage location. + + * - gateway_storage_name + - String + - Optional. The Unity Catalog-compatible name for the gateway storage location. This is the destination to use for the data that is extracted by the gateway. Delta Live Tables system will automatically create the storage location under the catalog and schema. + + * - gateway_storage_schema + - String + - Required, Immutable. The name of the schema for the gateway pipelines's storage location. + + ### targets.resources.pipelines.ingestion_definition The configuration for a managed ingestion pipeline. These settings cannot be used with the 'libraries', 'target' or 'catalog' settings. + #### Attributes -| Key | Type | Description | -|------------------------|----------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| `connection_name` | String | Immutable. The Unity Catalog connection that this ingestion pipeline uses to communicate with the source. This is used with connectors for applications like Salesforce, Workday, and so on. | -| `ingestion_gateway_id` | String | Immutable. Identifier for the gateway that is used by this ingestion pipeline to communicate with the source database. This is used with connectors to databases like SQL Server. | -| `objects` | Sequence | Required. Settings specifying tables to replicate and the destination for the replicated tables. | -| `table_configuration` | Map | Configuration settings to control the ingestion of tables. These settings are applied to all tables in the pipeline. | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - connection_name + - String + - Immutable. The Unity Catalog connection that this ingestion pipeline uses to communicate with the source. This is used with connectors for applications like Salesforce, Workday, and so on. + + * - ingestion_gateway_id + - String + - Immutable. Identifier for the gateway that is used by this ingestion pipeline to communicate with the source database. This is used with connectors to databases like SQL Server. + + * - objects + - Sequence + - Required. Settings specifying tables to replicate and the destination for the replicated tables. + + * - table_configuration + - Map + - Configuration settings to control the ingestion of tables. These settings are applied to all tables in the pipeline. + + ### targets.resources.pipelines.ingestion_definition.table_configuration Configuration settings to control the ingestion of tables. These settings are applied to all tables in the pipeline. + #### Attributes -| Key | Type | Description | -|-------------------------------------|----------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| `primary_keys` | Sequence | The primary key of the table used to apply changes. | -| `salesforce_include_formula_fields` | Boolean | If true, formula fields defined in the table are included in the ingestion. This setting is only valid for the Salesforce connector | -| `scd_type` | String | The SCD type to use to ingest the table. | -| `sequence_by` | Sequence | The column names specifying the logical order of events in the source data. Delta Live Tables uses this sequencing to handle change events that arrive out of order. | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - primary_keys + - Sequence + - The primary key of the table used to apply changes. + + * - salesforce_include_formula_fields + - Boolean + - If true, formula fields defined in the table are included in the ingestion. This setting is only valid for the Salesforce connector + + * - scd_type + - String + - The SCD type to use to ingest the table. + + * - sequence_by + - Sequence + - The column names specifying the logical order of events in the source data. Delta Live Tables uses this sequencing to handle change events that arrive out of order. + + ### targets.resources.pipelines.restart_window Restart window of this pipeline. + #### Attributes -| Key | Type | Description | -|----------------|---------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| `days_of_week` | String | Days of week in which the restart is allowed to happen (within a five-hour window starting at start_hour). If not specified all days of the week will be used. | -| `start_hour` | Integer | An integer between 0 and 23 denoting the start hour for the restart window in the 24-hour day. Continuous pipeline restart is triggered only within a five-hour window starting at this hour. | -| `time_zone_id` | String | Time zone id of restart window. See https://docs.databricks.com/sql/language-manual/sql-ref-syntax-aux-conf-mgmt-set-timezone.html for details. If not specified, UTC will be used. | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - days_of_week + - String + - Days of week in which the restart is allowed to happen (within a five-hour window starting at start_hour). If not specified all days of the week will be used. + + * - start_hour + - Integer + - An integer between 0 and 23 denoting the start hour for the restart window in the 24-hour day. Continuous pipeline restart is triggered only within a five-hour window starting at this hour. + + * - time_zone_id + - String + - Time zone id of restart window. See https://docs.databricks.com/sql/language-manual/sql-ref-syntax-aux-conf-mgmt-set-timezone.html for details. If not specified, UTC will be used. + + ### targets.resources.pipelines.trigger Which pipeline trigger to use. Deprecated: Use `continuous` instead. + #### Attributes -| Key | Type | Description | -|----------|------|-------------| -| `cron` | Map | - | -| `manual` | Map | - | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - cron + - Map + - + + * - manual + - Map + - + + ### targets.resources.pipelines.trigger.cron + #### Attributes -| Key | Type | Description | -|------------------------|--------|-------------| -| `quartz_cron_schedule` | String | - | -| `timezone_id` | String | - | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - quartz_cron_schedule + - String + - + + * - timezone_id + - String + - + + ### targets.resources.pipelines.trigger.manual + + ### targets.resources.quality_monitors The quality monitor definitions for the bundle. -| Key | Type | Description | -|----------|------|----------------------------| -| `` | Map | The definition of the item | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - + - Map + - The definition of the item + Each item has the following attributes: -| Key | Type | Description | -|------------------------------|----------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| `assets_dir` | String | The directory to store monitoring assets (e.g. dashboard, metric tables). | -| `baseline_table_name` | String | Name of the baseline table from which drift metrics are computed from. Columns in the monitored table should also be present in the baseline table. | -| `custom_metrics` | Sequence | Custom metrics to compute on the monitored table. These can be aggregate metrics, derived metrics (from already computed aggregate metrics), or drift metrics (comparing metrics across time windows). | -| `data_classification_config` | Map | The data classification config for the monitor. | -| `inference_log` | Map | Configuration for monitoring inference logs. | -| `notifications` | Map | The notification settings for the monitor. | -| `output_schema_name` | String | Schema where output metric tables are created. | -| `schedule` | Map | The schedule for automatically updating and refreshing metric tables. | -| `skip_builtin_dashboard` | Boolean | Whether to skip creating a default dashboard summarizing data quality metrics. | -| `slicing_exprs` | Sequence | List of column expressions to slice data with for targeted analysis. The data is grouped by each expression independently, resulting in a separate slice for each predicate and its complements. For high-cardinality columns, only the top 100 unique values by frequency will generate slices. | -| `snapshot` | Map | Configuration for monitoring snapshot tables. | -| `table_name` | String | - | -| `time_series` | Map | Configuration for monitoring time series tables. | -| `warehouse_id` | String | Optional argument to specify the warehouse for dashboard creation. If not specified, the first running warehouse will be used. | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - assets_dir + - String + - The directory to store monitoring assets (e.g. dashboard, metric tables). + + * - baseline_table_name + - String + - Name of the baseline table from which drift metrics are computed from. Columns in the monitored table should also be present in the baseline table. + + * - custom_metrics + - Sequence + - Custom metrics to compute on the monitored table. These can be aggregate metrics, derived metrics (from already computed aggregate metrics), or drift metrics (comparing metrics across time windows). + + * - data_classification_config + - Map + - The data classification config for the monitor. + + * - inference_log + - Map + - Configuration for monitoring inference logs. + + * - notifications + - Map + - The notification settings for the monitor. + + * - output_schema_name + - String + - Schema where output metric tables are created. + + * - schedule + - Map + - The schedule for automatically updating and refreshing metric tables. + + * - skip_builtin_dashboard + - Boolean + - Whether to skip creating a default dashboard summarizing data quality metrics. + + * - slicing_exprs + - Sequence + - List of column expressions to slice data with for targeted analysis. The data is grouped by each expression independently, resulting in a separate slice for each predicate and its complements. For high-cardinality columns, only the top 100 unique values by frequency will generate slices. + + * - snapshot + - Map + - Configuration for monitoring snapshot tables. + + * - table_name + - String + - + + * - time_series + - Map + - Configuration for monitoring time series tables. + + * - warehouse_id + - String + - Optional argument to specify the warehouse for dashboard creation. If not specified, the first running warehouse will be used. + + ### targets.resources.quality_monitors.data_classification_config The data classification config for the monitor. + #### Attributes -| Key | Type | Description | -|-----------|---------|-----------------------------------------| -| `enabled` | Boolean | Whether data classification is enabled. | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - enabled + - Boolean + - Whether data classification is enabled. + + ### targets.resources.quality_monitors.inference_log Configuration for monitoring inference logs. + #### Attributes -| Key | Type | Description | -|------------------------|----------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| `granularities` | Sequence | Granularities for aggregating data into time windows based on their timestamp. Currently the following static granularities are supported: {``"5 minutes"``, ``"30 minutes"``, ``"1 hour"``, ``"1 day"``, ``" week(s)"``, ``"1 month"``, ``"1 year"``}. | -| `label_col` | String | Optional column that contains the ground truth for the prediction. | -| `model_id_col` | String | Column that contains the id of the model generating the predictions. Metrics will be computed per model id by default, and also across all model ids. | -| `prediction_col` | String | Column that contains the output/prediction from the model. | -| `prediction_proba_col` | String | Optional column that contains the prediction probabilities for each class in a classification problem type. The values in this column should be a map, mapping each class label to the prediction probability for a given sample. The map should be of PySpark MapType(). | -| `problem_type` | String | Problem type the model aims to solve. Determines the type of model-quality metrics that will be computed. | -| `timestamp_col` | String | Column that contains the timestamps of requests. The column must be one of the following: - A ``TimestampType`` column - A column whose values can be converted to timestamps through the pyspark ``to_timestamp`` [function](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.functions.to_timestamp.html). | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - granularities + - Sequence + - Granularities for aggregating data into time windows based on their timestamp. Currently the following static granularities are supported: {``"5 minutes"``, ``"30 minutes"``, ``"1 hour"``, ``"1 day"``, ``" week(s)"``, ``"1 month"``, ``"1 year"``}. + + * - label_col + - String + - Optional column that contains the ground truth for the prediction. + + * - model_id_col + - String + - Column that contains the id of the model generating the predictions. Metrics will be computed per model id by default, and also across all model ids. + + * - prediction_col + - String + - Column that contains the output/prediction from the model. + + * - prediction_proba_col + - String + - Optional column that contains the prediction probabilities for each class in a classification problem type. The values in this column should be a map, mapping each class label to the prediction probability for a given sample. The map should be of PySpark MapType(). + + * - problem_type + - String + - Problem type the model aims to solve. Determines the type of model-quality metrics that will be computed. + + * - timestamp_col + - String + - Column that contains the timestamps of requests. The column must be one of the following: - A ``TimestampType`` column - A column whose values can be converted to timestamps through the pyspark ``to_timestamp`` [function](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.functions.to_timestamp.html). + + ### targets.resources.quality_monitors.notifications The notification settings for the monitor. + #### Attributes -| Key | Type | Description | -|--------------------------------------|------|------------------------------------------------------------------------------| -| `on_failure` | Map | Who to send notifications to on monitor failure. | -| `on_new_classification_tag_detected` | Map | Who to send notifications to when new data classification tags are detected. | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - on_failure + - Map + - Who to send notifications to on monitor failure. + + * - on_new_classification_tag_detected + - Map + - Who to send notifications to when new data classification tags are detected. + + ### targets.resources.quality_monitors.notifications.on_failure Who to send notifications to on monitor failure. + #### Attributes -| Key | Type | Description | -|-------------------|----------|-------------------------------------------------------------------------------------------------------| -| `email_addresses` | Sequence | The list of email addresses to send the notification to. A maximum of 5 email addresses is supported. | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - email_addresses + - Sequence + - The list of email addresses to send the notification to. A maximum of 5 email addresses is supported. + + ### targets.resources.quality_monitors.notifications.on_new_classification_tag_detected Who to send notifications to when new data classification tags are detected. + #### Attributes -| Key | Type | Description | -|-------------------|----------|-------------------------------------------------------------------------------------------------------| -| `email_addresses` | Sequence | The list of email addresses to send the notification to. A maximum of 5 email addresses is supported. | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - email_addresses + - Sequence + - The list of email addresses to send the notification to. A maximum of 5 email addresses is supported. + + ### targets.resources.quality_monitors.schedule The schedule for automatically updating and refreshing metric tables. + #### Attributes -| Key | Type | Description | -|--------------------------|--------|------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| `pause_status` | String | Read only field that indicates whether a schedule is paused or not. | -| `quartz_cron_expression` | String | The expression that determines when to run the monitor. See [examples](https://www.quartz-scheduler.org/documentation/quartz-2.3.0/tutorials/crontrigger.html). | -| `timezone_id` | String | The timezone id (e.g., ``"PST"``) in which to evaluate the quartz expression. | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - pause_status + - String + - Read only field that indicates whether a schedule is paused or not. + + * - quartz_cron_expression + - String + - The expression that determines when to run the monitor. See [examples](https://www.quartz-scheduler.org/documentation/quartz-2.3.0/tutorials/crontrigger.html). + + * - timezone_id + - String + - The timezone id (e.g., ``"PST"``) in which to evaluate the quartz expression. + + ### targets.resources.quality_monitors.snapshot Configuration for monitoring snapshot tables. + + ### targets.resources.quality_monitors.time_series Configuration for monitoring time series tables. + #### Attributes -| Key | Type | Description | -|-----------------|----------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| `granularities` | Sequence | Granularities for aggregating data into time windows based on their timestamp. Currently the following static granularities are supported: {``"5 minutes"``, ``"30 minutes"``, ``"1 hour"``, ``"1 day"``, ``" week(s)"``, ``"1 month"``, ``"1 year"``}. | -| `timestamp_col` | String | Column that contains the timestamps of requests. The column must be one of the following: - A ``TimestampType`` column - A column whose values can be converted to timestamps through the pyspark ``to_timestamp`` [function](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.functions.to_timestamp.html). | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - granularities + - Sequence + - Granularities for aggregating data into time windows based on their timestamp. Currently the following static granularities are supported: {``"5 minutes"``, ``"30 minutes"``, ``"1 hour"``, ``"1 day"``, ``" week(s)"``, ``"1 month"``, ``"1 year"``}. + + * - timestamp_col + - String + - Column that contains the timestamps of requests. The column must be one of the following: - A ``TimestampType`` column - A column whose values can be converted to timestamps through the pyspark ``to_timestamp`` [function](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.functions.to_timestamp.html). + + ### targets.resources.registered_models The registered model definitions for the bundle. -| Key | Type | Description | -|----------|------|----------------------------| -| `` | Map | The definition of the item | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - + - Map + - The definition of the item + Each item has the following attributes: -| Key | Type | Description | -|--------------------|----------|-----------------------------------------------------------------------------------| -| `catalog_name` | String | The name of the catalog where the schema and the registered model reside | -| `comment` | String | The comment attached to the registered model | -| `grants` | Sequence | - | -| `name` | String | The name of the registered model | -| `schema_name` | String | The name of the schema where the registered model resides | -| `storage_location` | String | The storage location on the cloud under which model version data files are stored | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - catalog_name + - String + - The name of the catalog where the schema and the registered model reside + + * - comment + - String + - The comment attached to the registered model + + * - grants + - Sequence + - + + * - name + - String + - The name of the registered model + + * - schema_name + - String + - The name of the schema where the registered model resides + + * - storage_location + - String + - The storage location on the cloud under which model version data files are stored + + ### targets.resources.schemas The schema definitions for the bundle. -| Key | Type | Description | -|----------|------|----------------------------| -| `` | Map | The definition of the item | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - + - Map + - The definition of the item + Each item has the following attributes: -| Key | Type | Description | -|----------------|----------|----------------------------------------------------| -| `catalog_name` | String | Name of parent catalog. | -| `comment` | String | User-provided free-form text description. | -| `grants` | Sequence | - | -| `name` | String | Name of schema, relative to parent catalog. | -| `properties` | Map | - | -| `storage_root` | String | Storage root URL for managed tables within schema. | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - catalog_name + - String + - Name of parent catalog. + + * - comment + - String + - User-provided free-form text description. + + * - grants + - Sequence + - + + * - name + - String + - Name of schema, relative to parent catalog. + + * - properties + - Map + - + + * - storage_root + - String + - Storage root URL for managed tables within schema. + + ### targets.resources.schemas.properties + + ### targets.resources.volumes -| Key | Type | Description | -|----------|------|----------------------------| -| `` | Map | The definition of the item | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - + - Map + - The definition of the item + Each item has the following attributes: -| Key | Type | Description | -|--------------------|----------|-------------------------------------------------------------| -| `catalog_name` | String | The name of the catalog where the schema and the volume are | -| `comment` | String | The comment attached to the volume | -| `grants` | Sequence | - | -| `name` | String | The name of the volume | -| `schema_name` | String | The name of the schema where the volume is | -| `storage_location` | String | The storage location on the cloud | -| `volume_type` | String | - | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - catalog_name + - String + - The name of the catalog where the schema and the volume are + + * - comment + - String + - The comment attached to the volume + + * - grants + - Sequence + - + + * - name + - String + - The name of the volume + + * - schema_name + - String + - The name of the schema where the volume is + + * - storage_location + - String + - The storage location on the cloud + + * - volume_type + - String + - + + ### targets.run_as The identity to use to run the bundle. + #### Attributes -| Key | Type | Description | -|--------------------------|--------|--------------------------------------------------------------------------------------------------------------| -| `service_principal_name` | String | Application ID of an active service principal. Setting this field requires the `servicePrincipal/user` role. | -| `user_name` | String | The email of an active workspace user. Non-admin users can only set this field to their own email. | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - service_principal_name + - String + - Application ID of an active service principal. Setting this field requires the `servicePrincipal/user` role. + + * - user_name + - String + - The email of an active workspace user. Non-admin users can only set this field to their own email. + + ### targets.sync The local paths to sync to the target workspace when a bundle is run or deployed. + #### Attributes -| Key | Type | Description | -|-----------|----------|----------------------------------------------------------------------------------------------------------------------------| -| `exclude` | Sequence | A list of files or folders to exclude from the bundle. | -| `include` | Sequence | A list of files or folders to include in the bundle. | -| `paths` | Sequence | The local folder paths, which can be outside the bundle root, to synchronize to the workspace when the bundle is deployed. | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - exclude + - Sequence + - A list of files or folders to exclude from the bundle. + + * - include + - Sequence + - A list of files or folders to include in the bundle. + + * - paths + - Sequence + - The local folder paths, which can be outside the bundle root, to synchronize to the workspace when the bundle is deployed. + + ### targets.variables The custom variable definitions for the target. -| Key | Type | Description | -|----------|------|----------------------------| -| `` | Map | The definition of the item | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - + - Map + - The definition of the item + Each item has the following attributes: -| Key | Type | Description | -|---------------|--------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| `default` | Any | - | -| `description` | String | The description of the variable. | -| `lookup` | Map | The name of the alert, cluster_policy, cluster, dashboard, instance_pool, job, metastore, pipeline, query, service_principal, or warehouse object for which to retrieve an ID. | -| `type` | String | The type of the variable. | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - default + - Any + - + + * - description + - String + - The description of the variable. + + * - lookup + - Map + - The name of the alert, cluster_policy, cluster, dashboard, instance_pool, job, metastore, pipeline, query, service_principal, or warehouse object for which to retrieve an ID. + + * - type + - String + - The type of the variable. + + ### targets.variables.lookup The name of the alert, cluster_policy, cluster, dashboard, instance_pool, job, metastore, pipeline, query, service_principal, or warehouse object for which to retrieve an ID. + #### Attributes -| Key | Type | Description | -|----------------------------|--------|-------------| -| `alert` | String | - | -| `cluster` | String | - | -| `cluster_policy` | String | - | -| `dashboard` | String | - | -| `instance_pool` | String | - | -| `job` | String | - | -| `metastore` | String | - | -| `notification_destination` | String | - | -| `pipeline` | String | - | -| `query` | String | - | -| `service_principal` | String | - | -| `warehouse` | String | - | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - alert + - String + - + + * - cluster + - String + - + + * - cluster_policy + - String + - + + * - dashboard + - String + - + + * - instance_pool + - String + - + + * - job + - String + - + + * - metastore + - String + - + + * - notification_destination + - String + - + + * - pipeline + - String + - + + * - query + - String + - + + * - service_principal + - String + - + + * - warehouse + - String + - + + ### targets.workspace The Databricks workspace for the target. + #### Attributes -| Key | Type | Description | -|-------------------------------|---------|--------------------------------------------------------------------------------------| -| `artifact_path` | String | The artifact path to use within the workspace for both deployments and workflow runs | -| `auth_type` | String | The authentication type. | -| `azure_client_id` | String | The Azure client ID | -| `azure_environment` | String | The Azure environment | -| `azure_login_app_id` | String | The Azure login app ID | -| `azure_tenant_id` | String | The Azure tenant ID | -| `azure_use_msi` | Boolean | Whether to use MSI for Azure | -| `azure_workspace_resource_id` | String | The Azure workspace resource ID | -| `client_id` | String | The client ID for the workspace | -| `file_path` | String | The file path to use within the workspace for both deployments and workflow runs | -| `google_service_account` | String | The Google service account name | -| `host` | String | The Databricks workspace host URL | -| `profile` | String | The Databricks workspace profile name | -| `resource_path` | String | The workspace resource path | -| `root_path` | String | The Databricks workspace root path | -| `state_path` | String | The workspace state path | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - artifact_path + - String + - The artifact path to use within the workspace for both deployments and workflow runs + + * - auth_type + - String + - The authentication type. + + * - azure_client_id + - String + - The Azure client ID + + * - azure_environment + - String + - The Azure environment + + * - azure_login_app_id + - String + - The Azure login app ID + + * - azure_tenant_id + - String + - The Azure tenant ID + + * - azure_use_msi + - Boolean + - Whether to use MSI for Azure + + * - azure_workspace_resource_id + - String + - The Azure workspace resource ID + + * - client_id + - String + - The client ID for the workspace + + * - file_path + - String + - The file path to use within the workspace for both deployments and workflow runs + + * - google_service_account + - String + - The Google service account name + + * - host + - String + - The Databricks workspace host URL + + * - profile + - String + - The Databricks workspace profile name + + * - resource_path + - String + - The workspace resource path + + * - root_path + - String + - The Databricks workspace root path + + * - state_path + - String + - The workspace state path + + ## variables A Map that defines the custom variables for the bundle, where each key is the name of the variable, and the value is a Map that defines the variable. -| Key | Type | Description | -|----------|------|----------------------------| -| `` | Map | The definition of the item | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - + - Map + - The definition of the item + Each item has the following attributes: -| Key | Type | Description | -|---------------|--------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| `default` | Any | - | -| `description` | String | The description of the variable | -| `lookup` | Map | The name of the `alert`, `cluster_policy`, `cluster`, `dashboard`, `instance_pool`, `job`, `metastore`, `pipeline`, `query`, `service_principal`, or `warehouse` object for which to retrieve an ID." | -| `type` | String | The type of the variable. | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - default + - Any + - + + * - description + - String + - The description of the variable + + * - lookup + - Map + - The name of the `alert`, `cluster_policy`, `cluster`, `dashboard`, `instance_pool`, `job`, `metastore`, `pipeline`, `query`, `service_principal`, or `warehouse` object for which to retrieve an ID." + + * - type + - String + - The type of the variable. + + ### variables.lookup The name of the alert, cluster_policy, cluster, dashboard, instance_pool, job, metastore, pipeline, query, service_principal, or warehouse object for which to retrieve an ID. + #### Attributes -| Key | Type | Description | -|----------------------------|--------|-------------| -| `alert` | String | - | -| `cluster` | String | - | -| `cluster_policy` | String | - | -| `dashboard` | String | - | -| `instance_pool` | String | - | -| `job` | String | - | -| `metastore` | String | - | -| `notification_destination` | String | - | -| `pipeline` | String | - | -| `query` | String | - | -| `service_principal` | String | - | -| `warehouse` | String | - | - + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - alert + - String + - + + * - cluster + - String + - + + * - cluster_policy + - String + - + + * - dashboard + - String + - + + * - instance_pool + - String + - + + * - job + - String + - + + * - metastore + - String + - + + * - notification_destination + - String + - + + * - pipeline + - String + - + + * - query + - String + - + + * - service_principal + - String + - + + * - warehouse + - String + - + + ## workspace Defines the Databricks workspace for the bundle. + #### Attributes -| Key | Type | Description | -|-------------------------------|---------|--------------------------------------------------------------------------------------| -| `artifact_path` | String | The artifact path to use within the workspace for both deployments and workflow runs | -| `auth_type` | String | The authentication type. | -| `azure_client_id` | String | The Azure client ID | -| `azure_environment` | String | The Azure environment | -| `azure_login_app_id` | String | The Azure login app ID | -| `azure_tenant_id` | String | The Azure tenant ID | -| `azure_use_msi` | Boolean | Whether to use MSI for Azure | -| `azure_workspace_resource_id` | String | The Azure workspace resource ID | -| `client_id` | String | The client ID for the workspace | -| `file_path` | String | The file path to use within the workspace for both deployments and workflow runs | -| `google_service_account` | String | The Google service account name | -| `host` | String | The Databricks workspace host URL | -| `profile` | String | The Databricks workspace profile name | -| `resource_path` | String | The workspace resource path | -| `root_path` | String | The Databricks workspace root path | -| `state_path` | String | The workspace state path | + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - artifact_path + - String + - The artifact path to use within the workspace for both deployments and workflow runs + + * - auth_type + - String + - The authentication type. + + * - azure_client_id + - String + - The Azure client ID + + * - azure_environment + - String + - The Azure environment + + * - azure_login_app_id + - String + - The Azure login app ID + + * - azure_tenant_id + - String + - The Azure tenant ID + + * - azure_use_msi + - Boolean + - Whether to use MSI for Azure + + * - azure_workspace_resource_id + - String + - The Azure workspace resource ID + + * - client_id + - String + - The client ID for the workspace + + * - file_path + - String + - The file path to use within the workspace for both deployments and workflow runs + + * - google_service_account + - String + - The Google service account name + + * - host + - String + - The Databricks workspace host URL + + * - profile + - String + - The Databricks workspace profile name + + * - resource_path + - String + - The workspace resource path + + * - root_path + - String + - The Databricks workspace root path + + * - state_path + - String + - The workspace state path + \ No newline at end of file From fe4c6b8a680c8b7a1de7fc5634291c536d85d62b Mon Sep 17 00:00:00 2001 From: Ilya Kuznetsov Date: Wed, 18 Dec 2024 19:59:08 +0100 Subject: [PATCH 03/26] fix: Styling issues --- bundle/internal/docs/docs.go | 14 +-- bundle/internal/docs/docs.md | 220 +++++++++-------------------------- 2 files changed, 60 insertions(+), 174 deletions(-) diff --git a/bundle/internal/docs/docs.go b/bundle/internal/docs/docs.go index 10486a8d2f..5926f3be73 100644 --- a/bundle/internal/docs/docs.go +++ b/bundle/internal/docs/docs.go @@ -34,12 +34,6 @@ type rootProp struct { topLevel bool } -const ( - AdditionalPropertiesMessage = "Each item has the following attributes:" - AdditionalPropertiesAttributeTitle = "" - AdditionalPropertiesAttributeDescription = "The definition of the item" -) - func getNodes(s jsonschema.Schema, refs map[string]jsonschema.Schema, a annotationFile) []rootNode { rootProps := []rootProp{} for k, v := range s.Properties { @@ -72,7 +66,11 @@ func getNodes(s jsonschema.Schema, refs map[string]jsonschema.Schema, a annotati node.ArrayItemAttributes = getAttributes(arrayItemType.Properties, refs) } - nodes = append(nodes, node) + isEmpty := len(node.Attributes) == 0 && len(node.ObjectKeyAttributes) == 0 && len(node.ArrayItemAttributes) == 0 + shouldAddNode := !isEmpty || node.TopLevel + if shouldAddNode { + nodes = append(nodes, node) + } } sort.Slice(nodes, func(i, j int) bool { @@ -101,7 +99,7 @@ func buildMarkdown(nodes []rootNode, outputFile string) error { if len(node.ObjectKeyAttributes) > 0 { m = buildAttributeTable(m, []attributeNode{ - {Title: AdditionalPropertiesAttributeTitle, Type: "Map", Description: AdditionalPropertiesAttributeDescription}, + {Title: fmt.Sprintf("<%s-entry-name>", node.Title), Type: "Map", Description: fmt.Sprintf("Item of the `%s` map", node.Title)}, }) m = m.PlainText("Each item has the following attributes:") m = m.LF() diff --git a/bundle/internal/docs/docs.md b/bundle/internal/docs/docs.md index 86ffa078d0..dceb5a94f9 100644 --- a/bundle/internal/docs/docs.md +++ b/bundle/internal/docs/docs.md @@ -10,9 +10,9 @@ Defines the attributes to build an artifact - Type - Description - * - + * - - Map - - The definition of the item + - Item of the `artifacts` map Each item has the following attributes: @@ -209,10 +209,6 @@ The PyDABs configuration. - The Python virtual environment path -### experimental.scripts -The commands to run - - ## include Specifies a list of path globs that contain configuration files to include within the bundle. See [_](/dev-tools/bundles/settings.md#include) @@ -285,10 +281,6 @@ Defines bundle deployment presets. See [_](/dev-tools/bundles/deployment-modes.m - A pause status to apply to all job triggers and schedules. Valid values are PAUSED or UNPAUSED. -### presets.tags -The tags for the bundle deployment. - - ## resources Specifies information about the Databricks resources used by the bundle. See [_](/dev-tools/bundles/resources.md). @@ -358,9 +350,9 @@ The cluster definitions for the bundle. - Type - Description - * - + * - - Map - - The definition of the item + - Item of the `resources.clusters` map Each item has the following attributes: @@ -706,15 +698,6 @@ Cluster iam role is used to access s3, please make sure the cluster iam role in - S3 region, e.g. `us-west-2`. Either region or endpoint needs to be set. If both are set, endpoint will be used. -### resources.clusters.custom_tags -Additional tags for cluster resources. Databricks will tag all cluster resources (e.g., AWS -instances and EBS volumes) with these tags in addition to `default_tags`. Notes: - -- Currently, Databricks allows at most 45 custom tags - -- Clusters can only reuse cloud resources if the resources' tags are a subset of the cluster tags - - ### resources.clusters.docker_image @@ -798,27 +781,6 @@ If not specified at cluster creation, a set of default values will be used. - Identifier for the availability zone in which the cluster resides. This can be one of the following: - "HA" => High availability, spread nodes across availability zones for a Databricks deployment region [default] - "AUTO" => Databricks picks an availability zone to schedule the cluster on. - A GCP availability zone => Pick One of the available zones for (machine type + region) from https://cloud.google.com/compute/docs/regions-zones. -### resources.clusters.spark_conf -An object containing a set of optional, user-specified Spark configuration key-value pairs. -Users can also pass in a string of extra JVM options to the driver and the executors via -`spark.driver.extraJavaOptions` and `spark.executor.extraJavaOptions` respectively. - - - -### resources.clusters.spark_env_vars -An object containing a set of optional, user-specified environment variable key-value pairs. -Please note that key-value pair of the form (X,Y) will be exported as is (i.e., -`export X='Y'`) while launching the driver and workers. - -In order to specify an additional set of `SPARK_DAEMON_JAVA_OPTS`, we recommend appending -them to `$SPARK_DAEMON_JAVA_OPTS` as shown in the example below. This ensures that all -default databricks managed environmental variables are included as well. - -Example Spark environment variables: -`{"SPARK_WORKER_MEMORY": "28000m", "SPARK_LOCAL_DIRS": "/local_disk0"}` or -`{"SPARK_DAEMON_JAVA_OPTS": "$SPARK_DAEMON_JAVA_OPTS -Dspark.shuffle.service.enabled=true"}` - - ### resources.clusters.workload_type @@ -870,9 +832,9 @@ The dashboard definitions for the bundle. - Type - Description - * - + * - - Map - - The definition of the item + - Item of the `resources.dashboards` map Each item has the following attributes: @@ -948,9 +910,9 @@ The experiment definitions for the bundle. - Type - Description - * - + * - - Map - - The definition of the item + - Item of the `resources.experiments` map Each item has the following attributes: @@ -1006,9 +968,9 @@ The job definitions for the bundle. - Type - Description - * - + * - - Map - - The definition of the item + - Item of the `resources.jobs` map Each item has the following attributes: @@ -1305,10 +1267,6 @@ An optional periodic schedule for this job. The default behavior is that the job - A Java timezone ID. The schedule for a job is resolved with respect to this timezone. See [Java TimeZone](https://docs.oracle.com/javase/7/docs/api/java/util/TimeZone.html) for details. This field is required. -### resources.jobs.tags -A map of tags associated with the job. These are forwarded to the cluster as cluster tags for jobs clusters, and are subject to the same limitations as cluster tags. A maximum of 25 tags can be added to the job. - - ### resources.jobs.trigger A configuration to trigger a run when certain conditions are met. The default behavior is that the job runs only when triggered by clicking “Run Now” in the Jobs UI or sending an API request to `runNow`. @@ -1496,9 +1454,9 @@ The model serving endpoint definitions for the bundle. - Type - Description - * - + * - - Map - - The definition of the item + - Item of the `resources.model_serving_endpoints` map Each item has the following attributes: @@ -1824,9 +1782,9 @@ The model definitions for the bundle. - Type - Description - * - + * - - Map - - The definition of the item + - Item of the `resources.models` map Each item has the following attributes: @@ -1882,9 +1840,9 @@ The pipeline definitions for the bundle. - Type - Description - * - + * - - Map - - The definition of the item + - Item of the `resources.pipelines` map Each item has the following attributes: @@ -1993,10 +1951,6 @@ Each item has the following attributes: - Which pipeline trigger to use. Deprecated: Use `continuous` instead. -### resources.pipelines.configuration -String-String configuration for this pipeline execution. - - ### resources.pipelines.deployment Deployment type of this pipeline. @@ -2205,10 +2159,6 @@ Which pipeline trigger to use. Deprecated: Use `continuous` instead. - -### resources.pipelines.trigger.manual - - - ### resources.quality_monitors The quality monitor definitions for the bundle. @@ -2220,9 +2170,9 @@ The quality monitor definitions for the bundle. - Type - Description - * - + * - - Map - - The definition of the item + - Item of the `resources.quality_monitors` map Each item has the following attributes: @@ -2435,10 +2385,6 @@ The schedule for automatically updating and refreshing metric tables. - The timezone id (e.g., ``"PST"``) in which to evaluate the quartz expression. -### resources.quality_monitors.snapshot -Configuration for monitoring snapshot tables. - - ### resources.quality_monitors.time_series Configuration for monitoring time series tables. @@ -2472,9 +2418,9 @@ The registered model definitions for the bundle. - Type - Description - * - + * - - Map - - The definition of the item + - Item of the `resources.registered_models` map Each item has the following attributes: @@ -2522,9 +2468,9 @@ The schema definitions for the bundle. - Type - Description - * - + * - - Map - - The definition of the item + - Item of the `resources.schemas` map Each item has the following attributes: @@ -2561,10 +2507,6 @@ Each item has the following attributes: - Storage root URL for managed tables within schema. -### resources.schemas.properties - - - ### resources.volumes @@ -2576,9 +2518,9 @@ Each item has the following attributes: - Type - Description - * - + * - - Map - - The definition of the item + - Item of the `resources.volumes` map Each item has the following attributes: @@ -2678,9 +2620,9 @@ Defines deployment targets for the bundle. - Type - Description - * - + * - - Map - - The definition of the item + - Item of the `targets` map Each item has the following attributes: @@ -2760,9 +2702,9 @@ The artifacts to include in the target deployment. - Type - Description - * - + * - - Map - - The definition of the item + - Item of the `targets.artifacts` map Each item has the following attributes: @@ -2963,10 +2905,6 @@ The deployment presets for the target. - A pause status to apply to all job triggers and schedules. Valid values are PAUSED or UNPAUSED. -### targets.presets.tags -The tags for the bundle deployment. - - ### targets.resources The resource definitions for the target. @@ -3036,9 +2974,9 @@ The cluster definitions for the bundle. - Type - Description - * - + * - - Map - - The definition of the item + - Item of the `targets.resources.clusters` map Each item has the following attributes: @@ -3384,15 +3322,6 @@ Cluster iam role is used to access s3, please make sure the cluster iam role in - S3 region, e.g. `us-west-2`. Either region or endpoint needs to be set. If both are set, endpoint will be used. -### targets.resources.clusters.custom_tags -Additional tags for cluster resources. Databricks will tag all cluster resources (e.g., AWS -instances and EBS volumes) with these tags in addition to `default_tags`. Notes: - -- Currently, Databricks allows at most 45 custom tags - -- Clusters can only reuse cloud resources if the resources' tags are a subset of the cluster tags - - ### targets.resources.clusters.docker_image @@ -3476,27 +3405,6 @@ If not specified at cluster creation, a set of default values will be used. - Identifier for the availability zone in which the cluster resides. This can be one of the following: - "HA" => High availability, spread nodes across availability zones for a Databricks deployment region [default] - "AUTO" => Databricks picks an availability zone to schedule the cluster on. - A GCP availability zone => Pick One of the available zones for (machine type + region) from https://cloud.google.com/compute/docs/regions-zones. -### targets.resources.clusters.spark_conf -An object containing a set of optional, user-specified Spark configuration key-value pairs. -Users can also pass in a string of extra JVM options to the driver and the executors via -`spark.driver.extraJavaOptions` and `spark.executor.extraJavaOptions` respectively. - - - -### targets.resources.clusters.spark_env_vars -An object containing a set of optional, user-specified environment variable key-value pairs. -Please note that key-value pair of the form (X,Y) will be exported as is (i.e., -`export X='Y'`) while launching the driver and workers. - -In order to specify an additional set of `SPARK_DAEMON_JAVA_OPTS`, we recommend appending -them to `$SPARK_DAEMON_JAVA_OPTS` as shown in the example below. This ensures that all -default databricks managed environmental variables are included as well. - -Example Spark environment variables: -`{"SPARK_WORKER_MEMORY": "28000m", "SPARK_LOCAL_DIRS": "/local_disk0"}` or -`{"SPARK_DAEMON_JAVA_OPTS": "$SPARK_DAEMON_JAVA_OPTS -Dspark.shuffle.service.enabled=true"}` - - ### targets.resources.clusters.workload_type @@ -3548,9 +3456,9 @@ The dashboard definitions for the bundle. - Type - Description - * - + * - - Map - - The definition of the item + - Item of the `targets.resources.dashboards` map Each item has the following attributes: @@ -3626,9 +3534,9 @@ The experiment definitions for the bundle. - Type - Description - * - + * - - Map - - The definition of the item + - Item of the `targets.resources.experiments` map Each item has the following attributes: @@ -3684,9 +3592,9 @@ The job definitions for the bundle. - Type - Description - * - + * - - Map - - The definition of the item + - Item of the `targets.resources.jobs` map Each item has the following attributes: @@ -3983,10 +3891,6 @@ An optional periodic schedule for this job. The default behavior is that the job - A Java timezone ID. The schedule for a job is resolved with respect to this timezone. See [Java TimeZone](https://docs.oracle.com/javase/7/docs/api/java/util/TimeZone.html) for details. This field is required. -### targets.resources.jobs.tags -A map of tags associated with the job. These are forwarded to the cluster as cluster tags for jobs clusters, and are subject to the same limitations as cluster tags. A maximum of 25 tags can be added to the job. - - ### targets.resources.jobs.trigger A configuration to trigger a run when certain conditions are met. The default behavior is that the job runs only when triggered by clicking “Run Now” in the Jobs UI or sending an API request to `runNow`. @@ -4174,9 +4078,9 @@ The model serving endpoint definitions for the bundle. - Type - Description - * - + * - - Map - - The definition of the item + - Item of the `targets.resources.model_serving_endpoints` map Each item has the following attributes: @@ -4502,9 +4406,9 @@ The model definitions for the bundle. - Type - Description - * - + * - - Map - - The definition of the item + - Item of the `targets.resources.models` map Each item has the following attributes: @@ -4560,9 +4464,9 @@ The pipeline definitions for the bundle. - Type - Description - * - + * - - Map - - The definition of the item + - Item of the `targets.resources.pipelines` map Each item has the following attributes: @@ -4671,10 +4575,6 @@ Each item has the following attributes: - Which pipeline trigger to use. Deprecated: Use `continuous` instead. -### targets.resources.pipelines.configuration -String-String configuration for this pipeline execution. - - ### targets.resources.pipelines.deployment Deployment type of this pipeline. @@ -4883,10 +4783,6 @@ Which pipeline trigger to use. Deprecated: Use `continuous` instead. - -### targets.resources.pipelines.trigger.manual - - - ### targets.resources.quality_monitors The quality monitor definitions for the bundle. @@ -4898,9 +4794,9 @@ The quality monitor definitions for the bundle. - Type - Description - * - + * - - Map - - The definition of the item + - Item of the `targets.resources.quality_monitors` map Each item has the following attributes: @@ -5113,10 +5009,6 @@ The schedule for automatically updating and refreshing metric tables. - The timezone id (e.g., ``"PST"``) in which to evaluate the quartz expression. -### targets.resources.quality_monitors.snapshot -Configuration for monitoring snapshot tables. - - ### targets.resources.quality_monitors.time_series Configuration for monitoring time series tables. @@ -5150,9 +5042,9 @@ The registered model definitions for the bundle. - Type - Description - * - + * - - Map - - The definition of the item + - Item of the `targets.resources.registered_models` map Each item has the following attributes: @@ -5200,9 +5092,9 @@ The schema definitions for the bundle. - Type - Description - * - + * - - Map - - The definition of the item + - Item of the `targets.resources.schemas` map Each item has the following attributes: @@ -5239,10 +5131,6 @@ Each item has the following attributes: - Storage root URL for managed tables within schema. -### targets.resources.schemas.properties - - - ### targets.resources.volumes @@ -5254,9 +5142,9 @@ Each item has the following attributes: - Type - Description - * - + * - - Map - - The definition of the item + - Item of the `targets.resources.volumes` map Each item has the following attributes: @@ -5356,9 +5244,9 @@ The custom variable definitions for the target. - Type - Description - * - + * - - Map - - The definition of the item + - Item of the `targets.variables` map Each item has the following attributes: @@ -5538,9 +5426,9 @@ A Map that defines the custom variables for the bundle, where each key is the na - Type - Description - * - + * - - Map - - The definition of the item + - Item of the `variables` map Each item has the following attributes: From cfa2be3c35d12dbb17419e8a798863a494e05cc0 Mon Sep 17 00:00:00 2001 From: Ilya Kuznetsov Date: Wed, 18 Dec 2024 20:53:38 +0100 Subject: [PATCH 04/26] fix: Remove openAPI docs --- bundle/internal/docs/docs.go | 33 +- bundle/internal/docs/docs.md | 4938 ++-------------------------------- bundle/internal/docs/main.go | 11 +- 3 files changed, 189 insertions(+), 4793 deletions(-) diff --git a/bundle/internal/docs/docs.go b/bundle/internal/docs/docs.go index 5926f3be73..289874a377 100644 --- a/bundle/internal/docs/docs.go +++ b/bundle/internal/docs/docs.go @@ -34,7 +34,7 @@ type rootProp struct { topLevel bool } -func getNodes(s jsonschema.Schema, refs map[string]jsonschema.Schema, a annotationFile) []rootNode { +func getNodes(s jsonschema.Schema, refs map[string]jsonschema.Schema, customFields map[string]bool) []rootNode { rootProps := []rootProp{} for k, v := range s.Properties { rootProps = append(rootProps, rootProp{k, v, true}) @@ -42,23 +42,24 @@ func getNodes(s jsonschema.Schema, refs map[string]jsonschema.Schema, a annotati nodes := make([]rootNode, 0, len(rootProps)) for i := 0; i < len(rootProps); i++ { - k := rootProps[i].k - v := rootProps[i].v + item := rootProps[i] + k := item.k + v := item.v v = resolveRefs(v, refs) node := rootNode{ Title: k, - Description: getDescription(v), - TopLevel: rootProps[i].topLevel, + Description: getDescription(v, item.topLevel), + TopLevel: item.topLevel, } node.Attributes = getAttributes(v.Properties, refs) - rootProps = append(rootProps, extractNodes(k, v.Properties, refs, a)...) + rootProps = append(rootProps, extractNodes(k, v.Properties, refs, customFields)...) additionalProps, ok := v.AdditionalProperties.(*jsonschema.Schema) if ok { objectKeyType := resolveRefs(additionalProps, refs) node.ObjectKeyAttributes = getAttributes(objectKeyType.Properties, refs) - rootProps = append(rootProps, extractNodes(k, objectKeyType.Properties, refs, a)...) + rootProps = append(rootProps, extractNodes(k, objectKeyType.Properties, refs, customFields)...) } if v.Items != nil { @@ -181,7 +182,7 @@ func getAttributes(props map[string]*jsonschema.Schema, refs map[string]jsonsche attributes = append(attributes, attributeNode{ Title: k, Type: typeString, - Description: getDescription(v), + Description: getDescription(v, true), }) } sort.Slice(attributes, func(i, j int) bool { @@ -190,8 +191,8 @@ func getAttributes(props map[string]*jsonschema.Schema, refs map[string]jsonsche return attributes } -func getDescription(s *jsonschema.Schema) string { - if s.MarkdownDescription != "" { +func getDescription(s *jsonschema.Schema, allowMarkdown bool) string { + if allowMarkdown && s.MarkdownDescription != "" { return s.MarkdownDescription } return s.Description @@ -226,14 +227,22 @@ func resolveRefs(s *jsonschema.Schema, schemas map[string]jsonschema.Schema) *js return node } -func extractNodes(prefix string, props map[string]*jsonschema.Schema, refs map[string]jsonschema.Schema, a annotationFile) []rootProp { +func shouldExtract(ref string, customFields map[string]bool) bool { + refKey := strings.TrimPrefix(ref, "#/$defs/") + _, isCustomField := customFields[refKey] + return isCustomField +} + +func extractNodes(prefix string, props map[string]*jsonschema.Schema, refs map[string]jsonschema.Schema, customFields map[string]bool) []rootProp { nodes := []rootProp{} for k, v := range props { + if !shouldExtract(*v.Reference, customFields) { + continue + } v = resolveRefs(v, refs) if v.Type == "object" { nodes = append(nodes, rootProp{prefix + "." + k, v, false}) } - v.MarkdownDescription = "" } return nodes } diff --git a/bundle/internal/docs/docs.md b/bundle/internal/docs/docs.md index dceb5a94f9..6cfbcc1319 100644 --- a/bundle/internal/docs/docs.md +++ b/bundle/internal/docs/docs.md @@ -339,22 +339,10 @@ Specifies information about the Databricks resources used by the bundle. See [_] - -### resources.clusters -The cluster definitions for the bundle. - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - - - Map - - Item of the `resources.clusters` map +## run_as +The identity to use to run the bundle. -Each item has the following attributes: +#### Attributes .. list-table:: @@ -364,124 +352,43 @@ Each item has the following attributes: - Type - Description - * - apply_policy_default_values - - Boolean - - When set to true, fixed and default values from the policy will be used for fields that are omitted. When set to false, only fixed values from the policy will be applied. - - * - autoscale - - Map - - Parameters needed in order to automatically scale clusters up and down based on load. Note: autoscaling works best with DB runtime versions 3.0 or later. - - * - autotermination_minutes - - Integer - - Automatically terminates the cluster after it is inactive for this time in minutes. If not set, this cluster will not be automatically terminated. If specified, the threshold must be between 10 and 10000 minutes. Users can also set this value to 0 to explicitly disable automatic termination. - - * - aws_attributes - - Map - - Attributes related to clusters running on Amazon Web Services. If not specified at cluster creation, a set of default values will be used. - - * - azure_attributes - - Map - - Attributes related to clusters running on Microsoft Azure. If not specified at cluster creation, a set of default values will be used. - - * - cluster_log_conf - - Map - - The configuration for delivering spark logs to a long-term storage destination. Two kinds of destinations (dbfs and s3) are supported. Only one destination can be specified for one cluster. If the conf is given, the logs will be delivered to the destination every `5 mins`. The destination of driver logs is `$destination/$clusterId/driver`, while the destination of executor logs is `$destination/$clusterId/executor`. - - * - cluster_name + * - service_principal_name - String - - Cluster name requested by the user. This doesn't have to be unique. If not specified at creation, the cluster name will be an empty string. - - * - custom_tags - - Map - - Additional tags for cluster resources. Databricks will tag all cluster resources (e.g., AWS instances and EBS volumes) with these tags in addition to `default_tags`. Notes: - Currently, Databricks allows at most 45 custom tags - Clusters can only reuse cloud resources if the resources' tags are a subset of the cluster tags + - - * - data_security_mode + * - user_name - String - - * - docker_image - - Map - - - * - driver_instance_pool_id - - String - - The optional ID of the instance pool for the driver of the cluster belongs. The pool cluster uses the instance pool with id (instance_pool_id) if the driver pool is not assigned. +## sync +The files and file paths to include or exclude in the bundle. See [_](/dev-tools/bundles/) - * - driver_node_type_id - - String - - The node type of the Spark driver. Note that this field is optional; if unset, the driver node type will be set as the same value as `node_type_id` defined above. +#### Attributes - * - enable_elastic_disk - - Boolean - - Autoscaling Local Storage: when enabled, this cluster will dynamically acquire additional disk space when its Spark workers are running low on disk space. This feature requires specific AWS permissions to function correctly - refer to the User Guide for more details. - * - enable_local_disk_encryption - - Boolean - - Whether to enable LUKS on cluster VMs' local disks +.. list-table:: + :header-rows: 1 - * - gcp_attributes - - Map - - Attributes related to clusters running on Google Cloud Platform. If not specified at cluster creation, a set of default values will be used. + * - Key + - Type + - Description - * - init_scripts + * - exclude - Sequence - - The configuration for storing init scripts. Any number of destinations can be specified. The scripts are executed sequentially in the order provided. If `cluster_log_conf` is specified, init script logs are sent to `//init_scripts`. - - * - instance_pool_id - - String - - The optional ID of the instance pool to which the cluster belongs. - - * - node_type_id - - String - - This field encodes, through a single value, the resources available to each of the Spark nodes in this cluster. For example, the Spark nodes can be provisioned and optimized for memory or compute intensive workloads. A list of available node types can be retrieved by using the :method:clusters/listNodeTypes API call. - - * - num_workers - - Integer - - Number of worker nodes that this cluster should have. A cluster has one Spark Driver and `num_workers` Executors for a total of `num_workers` + 1 Spark nodes. Note: When reading the properties of a cluster, this field reflects the desired number of workers rather than the actual current number of workers. For instance, if a cluster is resized from 5 to 10 workers, this field will immediately be updated to reflect the target size of 10 workers, whereas the workers listed in `spark_info` will gradually increase from 5 to 10 as the new nodes are provisioned. + - A list of files or folders to exclude from the bundle. - * - permissions + * - include - Sequence - - - - * - policy_id - - String - - The ID of the cluster policy used to create the cluster if applicable. - - * - runtime_engine - - String - - - - * - single_user_name - - String - - Single user name if data_security_mode is `SINGLE_USER` - - * - spark_conf - - Map - - An object containing a set of optional, user-specified Spark configuration key-value pairs. Users can also pass in a string of extra JVM options to the driver and the executors via `spark.driver.extraJavaOptions` and `spark.executor.extraJavaOptions` respectively. - - * - spark_env_vars - - Map - - An object containing a set of optional, user-specified environment variable key-value pairs. Please note that key-value pair of the form (X,Y) will be exported as is (i.e., `export X='Y'`) while launching the driver and workers. In order to specify an additional set of `SPARK_DAEMON_JAVA_OPTS`, we recommend appending them to `$SPARK_DAEMON_JAVA_OPTS` as shown in the example below. This ensures that all default databricks managed environmental variables are included as well. Example Spark environment variables: `{"SPARK_WORKER_MEMORY": "28000m", "SPARK_LOCAL_DIRS": "/local_disk0"}` or `{"SPARK_DAEMON_JAVA_OPTS": "$SPARK_DAEMON_JAVA_OPTS -Dspark.shuffle.service.enabled=true"}` - - * - spark_version - - String - - The Spark version of the cluster, e.g. `3.3.x-scala2.11`. A list of available Spark versions can be retrieved by using the :method:clusters/sparkVersions API call. + - A list of files or folders to include in the bundle. - * - ssh_public_keys + * - paths - Sequence - - SSH public key contents that will be added to each Spark node in this cluster. The corresponding private keys can be used to login with the user name `ubuntu` on port `2200`. Up to 10 keys can be specified. - - * - workload_type - - Map - - - + - The local folder paths, which can be outside the bundle root, to synchronize to the workspace when the bundle is deployed. -### resources.clusters.autoscale -Parameters needed in order to automatically scale clusters up and down based on load. -Note: autoscaling works best with DB runtime versions 3.0 or later. -#### Attributes +## targets +Defines deployment targets for the bundle. .. list-table:: @@ -491,20 +398,11 @@ Note: autoscaling works best with DB runtime versions 3.0 or later. - Type - Description - * - max_workers - - Integer - - The maximum number of workers to which the cluster can scale up when overloaded. Note that `max_workers` must be strictly greater than `min_workers`. - - * - min_workers - - Integer - - The minimum number of workers to which the cluster can scale down when underutilized. It is also the initial number of workers the cluster will have after creation. - - -### resources.clusters.aws_attributes -Attributes related to clusters running on Amazon Web Services. -If not specified at cluster creation, a set of default values will be used. + * - + - Map + - Item of the `targets` map -#### Attributes +Each item has the following attributes: .. list-table:: @@ -514,80 +412,65 @@ If not specified at cluster creation, a set of default values will be used. - Type - Description - * - availability - - String - - - - * - ebs_volume_count - - Integer - - The number of volumes launched for each instance. Users can choose up to 10 volumes. This feature is only enabled for supported node types. Legacy node types cannot specify custom EBS volumes. For node types with no instance store, at least one EBS volume needs to be specified; otherwise, cluster creation will fail. These EBS volumes will be mounted at `/ebs0`, `/ebs1`, and etc. Instance store volumes will be mounted at `/local_disk0`, `/local_disk1`, and etc. If EBS volumes are attached, Databricks will configure Spark to use only the EBS volumes for scratch storage because heterogenously sized scratch devices can lead to inefficient disk utilization. If no EBS volumes are attached, Databricks will configure Spark to use instance store volumes. Please note that if EBS volumes are specified, then the Spark configuration `spark.local.dir` will be overridden. - - * - ebs_volume_iops - - Integer - - If using gp3 volumes, what IOPS to use for the disk. If this is not set, the maximum performance of a gp2 volume with the same volume size will be used. - - * - ebs_volume_size - - Integer - - The size of each EBS volume (in GiB) launched for each instance. For general purpose SSD, this value must be within the range 100 - 4096. For throughput optimized HDD, this value must be within the range 500 - 4096. - - * - ebs_volume_throughput - - Integer - - If using gp3 volumes, what throughput to use for the disk. If this is not set, the maximum performance of a gp2 volume with the same volume size will be used. - - * - ebs_volume_type - - String - - + * - artifacts + - Map + - The artifacts to include in the target deployment. See [_](#artifact) - * - first_on_demand - - Integer - - The first `first_on_demand` nodes of the cluster will be placed on on-demand instances. If this value is greater than 0, the cluster driver node in particular will be placed on an on-demand instance. If this value is greater than or equal to the current cluster size, all nodes will be placed on on-demand instances. If this value is less than the current cluster size, `first_on_demand` nodes will be placed on on-demand instances and the remainder will be placed on `availability` instances. Note that this value does not affect cluster size and cannot currently be mutated over the lifetime of a cluster. + * - bundle + - Map + - The name of the bundle when deploying to this target. - * - instance_profile_arn + * - cluster_id - String - - Nodes for this cluster will only be placed on AWS instances with this instance profile. If ommitted, nodes will be placed on instances without an IAM instance profile. The instance profile must have previously been added to the Databricks environment by an account administrator. This feature may only be available to certain customer plans. If this field is ommitted, we will pull in the default from the conf if it exists. - - * - spot_bid_price_percent - - Integer - - The bid price for AWS spot instances, as a percentage of the corresponding instance type's on-demand price. For example, if this field is set to 50, and the cluster needs a new `r3.xlarge` spot instance, then the bid price is half of the price of on-demand `r3.xlarge` instances. Similarly, if this field is set to 200, the bid price is twice the price of on-demand `r3.xlarge` instances. If not specified, the default value is 100. When spot instances are requested for this cluster, only spot instances whose bid price percentage matches this field will be considered. Note that, for safety, we enforce this field to be no more than 10000. The default value and documentation here should be kept consistent with CommonConf.defaultSpotBidPricePercent and CommonConf.maxSpotBidPricePercent. + - The ID of the cluster to use for this target. - * - zone_id + * - compute_id - String - - Identifier for the availability zone/datacenter in which the cluster resides. This string will be of a form like "us-west-2a". The provided availability zone must be in the same region as the Databricks deployment. For example, "us-west-2a" is not a valid zone id if the Databricks deployment resides in the "us-east-1" region. This is an optional field at cluster creation, and if not specified, a default zone will be used. If the zone specified is "auto", will try to place cluster in a zone with high availability, and will retry placement in a different AZ if there is not enough capacity. The list of available zones as well as the default value can be found by using the `List Zones` method. + - Deprecated. The ID of the compute to use for this target. + * - default + - Boolean + - Whether this target is the default target. -### resources.clusters.azure_attributes -Attributes related to clusters running on Microsoft Azure. -If not specified at cluster creation, a set of default values will be used. + * - git + - Map + - The Git version control settings for the target. See [_](#git). -#### Attributes + * - mode + - String + - The deployment mode for the target. Valid values are `development` or `production`. See [_](/dev-tools/bundles/deployment-modes.md). + * - permissions + - Sequence + - The permissions for deploying and running the bundle in the target. See [_](#permission). -.. list-table:: - :header-rows: 1 + * - presets + - Map + - The deployment presets for the target. See [_](#preset). - * - Key - - Type - - Description + * - resources + - Map + - The resource definitions for the target. See [_](#resources). - * - availability - - String - - + * - run_as + - Map + - The identity to use to run the bundle. See [_](#job_run_as) and [_](/dev-tools/bundles/run_as.md). - * - first_on_demand - - Integer - - The first `first_on_demand` nodes of the cluster will be placed on on-demand instances. This value should be greater than 0, to make sure the cluster driver node is placed on an on-demand instance. If this value is greater than or equal to the current cluster size, all nodes will be placed on on-demand instances. If this value is less than the current cluster size, `first_on_demand` nodes will be placed on on-demand instances and the remainder will be placed on `availability` instances. Note that this value does not affect cluster size and cannot currently be mutated over the lifetime of a cluster. + * - sync + - Map + - The local paths to sync to the target workspace when a bundle is run or deployed. See [_](#sync). - * - log_analytics_info + * - variables - Map - - Defines values necessary to configure and run Azure Log Analytics agent + - The custom variable definitions for the target. See [_](/dev-tools/bundles/settings.md#variables) and [_](/dev-tools/bundles/variables.md). - * - spot_bid_max_price - - Any - - The max bid price to be used for Azure spot instances. The Max price for the bid cannot be higher than the on-demand price of the instance. If not specified, the default value is -1, which specifies that the instance cannot be evicted on the basis of price, and only on the basis of availability. Further, the value should > 0 or -1. + * - workspace + - Map + - The Databricks workspace for the target. [_](#workspace) -### resources.clusters.azure_attributes.log_analytics_info -Defines values necessary to configure and run Azure Log Analytics agent +### targets.bundle +The name of the bundle when deploying to this target. #### Attributes @@ -599,44 +482,37 @@ Defines values necessary to configure and run Azure Log Analytics agent - Type - Description - * - log_analytics_primary_key + * - cluster_id - String - - + - The ID of a cluster to use to run the bundle. See [_](/dev-tools/bundles/settings.md#cluster_id). - * - log_analytics_workspace_id + * - compute_id - String - - - - -### resources.clusters.cluster_log_conf -The configuration for delivering spark logs to a long-term storage destination. -Two kinds of destinations (dbfs and s3) are supported. Only one destination can be specified -for one cluster. If the conf is given, the logs will be delivered to the destination every -`5 mins`. The destination of driver logs is `$destination/$clusterId/driver`, while -the destination of executor logs is `$destination/$clusterId/executor`. - -#### Attributes - - -.. list-table:: - :header-rows: 1 + - - * - Key - - Type - - Description + * - databricks_cli_version + - String + - The Databricks CLI version to use for the bundle. See [_](/dev-tools/bundles/settings.md#databricks_cli_version). - * - dbfs + * - deployment - Map - - destination needs to be provided. e.g. `{ "dbfs" : { "destination" : "dbfs:/home/cluster_log" } }` + - The definition of the bundle deployment. For supported attributes, see [_](#deployment) and [_](/dev-tools/bundles/deployment-modes.md). - * - s3 + * - git - Map - - destination and either the region or endpoint need to be provided. e.g. `{ "s3": { "destination" : "s3://cluster_log_bucket/prefix", "region" : "us-west-2" } }` Cluster iam role is used to access s3, please make sure the cluster iam role in `instance_profile_arn` has permission to write data to the s3 destination. + - The Git version control details that are associated with your bundle. For supported attributes, see [_](#git) and [_](/dev-tools/bundles/settings.md#git). + + * - name + - String + - The name of the bundle. + + * - uuid + - String + - -### resources.clusters.cluster_log_conf.dbfs -destination needs to be provided. e.g. -`{ "dbfs" : { "destination" : "dbfs:/home/cluster_log" } }` +### targets.bundle.deployment +The definition of the bundle deployment #### Attributes @@ -648,16 +524,17 @@ destination needs to be provided. e.g. - Type - Description - * - destination - - String - - dbfs destination, e.g. `dbfs:/my/path` + * - fail_on_active_runs + - Boolean + - Whether to fail on active runs. If this is set to true a deployment that is running can be interrupted. + + * - lock + - Map + - The deployment lock attributes. See [_](#lock). -### resources.clusters.cluster_log_conf.s3 -destination and either the region or endpoint need to be provided. e.g. -`{ "s3": { "destination" : "s3://cluster_log_bucket/prefix", "region" : "us-west-2" } }` -Cluster iam role is used to access s3, please make sure the cluster iam role in -`instance_profile_arn` has permission to write data to the s3 destination. +### targets.bundle.deployment.lock +The deployment lock attributes. #### Attributes @@ -669,37 +546,17 @@ Cluster iam role is used to access s3, please make sure the cluster iam role in - Type - Description - * - canned_acl - - String - - (Optional) Set canned access control list for the logs, e.g. `bucket-owner-full-control`. If `canned_cal` is set, please make sure the cluster iam role has `s3:PutObjectAcl` permission on the destination bucket and prefix. The full list of possible canned acl can be found at http://docs.aws.amazon.com/AmazonS3/latest/dev/acl-overview.html#canned-acl. Please also note that by default only the object owner gets full controls. If you are using cross account role for writing data, you may want to set `bucket-owner-full-control` to make bucket owner able to read the logs. - - * - destination - - String - - S3 destination, e.g. `s3://my-bucket/some-prefix` Note that logs will be delivered using cluster iam role, please make sure you set cluster iam role and the role has write access to the destination. Please also note that you cannot use AWS keys to deliver logs. - - * - enable_encryption + * - enabled - Boolean - - (Optional) Flag to enable server side encryption, `false` by default. - - * - encryption_type - - String - - (Optional) The encryption type, it could be `sse-s3` or `sse-kms`. It will be used only when encryption is enabled and the default type is `sse-s3`. - - * - endpoint - - String - - S3 endpoint, e.g. `https://s3-us-west-2.amazonaws.com`. Either region or endpoint needs to be set. If both are set, endpoint will be used. - - * - kms_key - - String - - (Optional) Kms key which will be used if encryption is enabled and encryption type is set to `sse-kms`. + - Whether this lock is enabled. - * - region - - String - - S3 region, e.g. `us-west-2`. Either region or endpoint needs to be set. If both are set, endpoint will be used. + * - force + - Boolean + - Whether to force this lock if it is enabled. -### resources.clusters.docker_image - +### targets.bundle.git +The Git version control details that are associated with your bundle. #### Attributes @@ -711,17 +568,17 @@ Cluster iam role is used to access s3, please make sure the cluster iam role in - Type - Description - * - basic_auth - - Map - - + * - branch + - String + - The Git branch name. See [_](/dev-tools/bundles/settings.md#git). - * - url + * - origin_url - String - - URL of the docker image. + - The origin URL of the repository. See [_](/dev-tools/bundles/settings.md#git). -### resources.clusters.docker_image.basic_auth - +### targets.git +The Git version control settings for the target. #### Attributes @@ -733,18 +590,17 @@ Cluster iam role is used to access s3, please make sure the cluster iam role in - Type - Description - * - password + * - branch - String - - Password of the user + - The Git branch name. See [_](/dev-tools/bundles/settings.md#git). - * - username + * - origin_url - String - - Name of the user + - The origin URL of the repository. See [_](/dev-tools/bundles/settings.md#git). -### resources.clusters.gcp_attributes -Attributes related to clusters running on Google Cloud Platform. -If not specified at cluster creation, a set of default values will be used. +### targets.presets +The deployment presets for the target. #### Attributes @@ -756,33 +612,33 @@ If not specified at cluster creation, a set of default values will be used. - Type - Description - * - availability - - String - - - - * - boot_disk_size + * - jobs_max_concurrent_runs - Integer - - boot disk size in GB + - The maximum concurrent runs for a job. - * - google_service_account + * - name_prefix - String - - If provided, the cluster will impersonate the google service account when accessing gcloud services (like GCS). The google service account must have previously been added to the Databricks environment by an account administrator. + - The prefix for job runs of the bundle. - * - local_ssd_count - - Integer - - If provided, each node (workers and driver) in the cluster will have this number of local SSDs attached. Each local SSD is 375GB in size. Refer to [GCP documentation](https://cloud.google.com/compute/docs/disks/local-ssd#choose_number_local_ssds) for the supported number of local SSDs for each instance type. + * - pipelines_development + - Boolean + - Whether pipeline deployments should be locked in development mode. - * - use_preemptible_executors + * - source_linked_deployment - Boolean - - This field determines whether the spark executors will be scheduled to run on preemptible VMs (when set to true) versus standard compute engine VMs (when set to false; default). Note: Soon to be deprecated, use the availability field instead. + - Whether to link the deployment to the bundle source. + + * - tags + - Map + - The tags for the bundle deployment. - * - zone_id + * - trigger_pause_status - String - - Identifier for the availability zone in which the cluster resides. This can be one of the following: - "HA" => High availability, spread nodes across availability zones for a Databricks deployment region [default] - "AUTO" => Databricks picks an availability zone to schedule the cluster on. - A GCP availability zone => Pick One of the available zones for (machine type + region) from https://cloud.google.com/compute/docs/regions-zones. + - A pause status to apply to all job triggers and schedules. Valid values are PAUSED or UNPAUSED. -### resources.clusters.workload_type - +### targets.resources +The resource definitions for the target. #### Attributes @@ -794,4419 +650,51 @@ If not specified at cluster creation, a set of default values will be used. - Type - Description - * - clients + * - clusters - Map - - defined what type of clients can use the cluster. E.g. Notebooks, Jobs + - The cluster definitions for the bundle. See [_](/dev-tools/bundles/resources.md#cluster) - -### resources.clusters.workload_type.clients - defined what type of clients can use the cluster. E.g. Notebooks, Jobs - -#### Attributes - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - jobs - - Boolean - - With jobs set, the cluster can be used for jobs - - * - notebooks - - Boolean - - With notebooks set, this cluster can be used for notebooks - - -### resources.dashboards -The dashboard definitions for the bundle. - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - - - Map - - Item of the `resources.dashboards` map - -Each item has the following attributes: - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - create_time - - String - - The timestamp of when the dashboard was created. - - * - dashboard_id - - String - - UUID identifying the dashboard. - - * - display_name - - String - - The display name of the dashboard. - - * - embed_credentials - - Boolean - - - - * - etag - - String - - The etag for the dashboard. Can be optionally provided on updates to ensure that the dashboard has not been modified since the last read. This field is excluded in List Dashboards responses. - - * - file_path - - String - - - - * - lifecycle_state - - String - - The state of the dashboard resource. Used for tracking trashed status. - - * - parent_path - - String - - The workspace path of the folder containing the dashboard. Includes leading slash and no trailing slash. This field is excluded in List Dashboards responses. - - * - path - - String - - The workspace path of the dashboard asset, including the file name. Exported dashboards always have the file extension `.lvdash.json`. This field is excluded in List Dashboards responses. - - * - permissions - - Sequence - - - - * - serialized_dashboard - - Any - - The contents of the dashboard in serialized string form. This field is excluded in List Dashboards responses. Use the [get dashboard API](https://docs.databricks.com/api/workspace/lakeview/get) to retrieve an example response, which includes the `serialized_dashboard` field. This field provides the structure of the JSON string that represents the dashboard's layout and components. - - * - update_time - - String - - The timestamp of when the dashboard was last updated by the user. This field is excluded in List Dashboards responses. - - * - warehouse_id - - String - - The warehouse ID used to run the dashboard. - - -### resources.experiments -The experiment definitions for the bundle. - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - - - Map - - Item of the `resources.experiments` map - -Each item has the following attributes: - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - artifact_location - - String - - Location where artifacts for the experiment are stored. - - * - creation_time - - Integer - - Creation time - - * - experiment_id - - String - - Unique identifier for the experiment. - - * - last_update_time - - Integer - - Last update time - - * - lifecycle_stage - - String - - Current life cycle stage of the experiment: "active" or "deleted". Deleted experiments are not returned by APIs. - - * - name - - String - - Human readable name that identifies the experiment. - - * - permissions - - Sequence - - - - * - tags - - Sequence - - Tags: Additional metadata key-value pairs. - - -### resources.jobs -The job definitions for the bundle. - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - - - Map - - Item of the `resources.jobs` map - -Each item has the following attributes: - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - budget_policy_id - - String - - The id of the user specified budget policy to use for this job. If not specified, a default budget policy may be applied when creating or modifying the job. See `effective_budget_policy_id` for the budget policy used by this workload. - - * - continuous - - Map - - An optional continuous property for this job. The continuous property will ensure that there is always one run executing. Only one of `schedule` and `continuous` can be used. - - * - description - - String - - An optional description for the job. The maximum length is 27700 characters in UTF-8 encoding. - - * - email_notifications - - Map - - An optional set of email addresses that is notified when runs of this job begin or complete as well as when this job is deleted. - - * - environments - - Sequence - - A list of task execution environment specifications that can be referenced by serverless tasks of this job. An environment is required to be present for serverless tasks. For serverless notebook tasks, the environment is accessible in the notebook environment panel. For other serverless tasks, the task environment is required to be specified using environment_key in the task settings. - - * - git_source - - Map - - An optional specification for a remote Git repository containing the source code used by tasks. Version-controlled source code is supported by notebook, dbt, Python script, and SQL File tasks. If `git_source` is set, these tasks retrieve the file from the remote repository by default. However, this behavior can be overridden by setting `source` to `WORKSPACE` on the task. Note: dbt and SQL File tasks support only version-controlled sources. If dbt or SQL File tasks are used, `git_source` must be defined on the job. - - * - health - - Map - - - - * - job_clusters - - Sequence - - A list of job cluster specifications that can be shared and reused by tasks of this job. Libraries cannot be declared in a shared job cluster. You must declare dependent libraries in task settings. - - * - max_concurrent_runs - - Integer - - An optional maximum allowed number of concurrent runs of the job. Set this value if you want to be able to execute multiple runs of the same job concurrently. This is useful for example if you trigger your job on a frequent schedule and want to allow consecutive runs to overlap with each other, or if you want to trigger multiple runs which differ by their input parameters. This setting affects only new runs. For example, suppose the job’s concurrency is 4 and there are 4 concurrent active runs. Then setting the concurrency to 3 won’t kill any of the active runs. However, from then on, new runs are skipped unless there are fewer than 3 active runs. This value cannot exceed 1000. Setting this value to `0` causes all new runs to be skipped. - - * - name - - String - - An optional name for the job. The maximum length is 4096 bytes in UTF-8 encoding. - - * - notification_settings - - Map - - Optional notification settings that are used when sending notifications to each of the `email_notifications` and `webhook_notifications` for this job. - - * - parameters - - Sequence - - Job-level parameter definitions - - * - permissions - - Sequence - - - - * - queue - - Map - - The queue settings of the job. - - * - run_as - - Map - - - - * - schedule - - Map - - An optional periodic schedule for this job. The default behavior is that the job only runs when triggered by clicking “Run Now” in the Jobs UI or sending an API request to `runNow`. - - * - tags - - Map - - A map of tags associated with the job. These are forwarded to the cluster as cluster tags for jobs clusters, and are subject to the same limitations as cluster tags. A maximum of 25 tags can be added to the job. - - * - tasks - - Sequence - - A list of task specifications to be executed by this job. - - * - timeout_seconds - - Integer - - An optional timeout applied to each run of this job. A value of `0` means no timeout. - - * - trigger - - Map - - A configuration to trigger a run when certain conditions are met. The default behavior is that the job runs only when triggered by clicking “Run Now” in the Jobs UI or sending an API request to `runNow`. - - * - webhook_notifications - - Map - - A collection of system notification IDs to notify when runs of this job begin or complete. - - -### resources.jobs.continuous -An optional continuous property for this job. The continuous property will ensure that there is always one run executing. Only one of `schedule` and `continuous` can be used. - -#### Attributes - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - pause_status - - String - - Indicate whether the continuous execution of the job is paused or not. Defaults to UNPAUSED. - - -### resources.jobs.email_notifications -An optional set of email addresses that is notified when runs of this job begin or complete as well as when this job is deleted. - -#### Attributes - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - no_alert_for_skipped_runs - - Boolean - - If true, do not send email to recipients specified in `on_failure` if the run is skipped. This field is `deprecated`. Please use the `notification_settings.no_alert_for_skipped_runs` field. - - * - on_duration_warning_threshold_exceeded - - Sequence - - A list of email addresses to be notified when the duration of a run exceeds the threshold specified for the `RUN_DURATION_SECONDS` metric in the `health` field. If no rule for the `RUN_DURATION_SECONDS` metric is specified in the `health` field for the job, notifications are not sent. - - * - on_failure - - Sequence - - A list of email addresses to be notified when a run unsuccessfully completes. A run is considered to have completed unsuccessfully if it ends with an `INTERNAL_ERROR` `life_cycle_state` or a `FAILED`, or `TIMED_OUT` result_state. If this is not specified on job creation, reset, or update the list is empty, and notifications are not sent. - - * - on_start - - Sequence - - A list of email addresses to be notified when a run begins. If not specified on job creation, reset, or update, the list is empty, and notifications are not sent. - - * - on_streaming_backlog_exceeded - - Sequence - - A list of email addresses to notify when any streaming backlog thresholds are exceeded for any stream. Streaming backlog thresholds can be set in the `health` field using the following metrics: `STREAMING_BACKLOG_BYTES`, `STREAMING_BACKLOG_RECORDS`, `STREAMING_BACKLOG_SECONDS`, or `STREAMING_BACKLOG_FILES`. Alerting is based on the 10-minute average of these metrics. If the issue persists, notifications are resent every 30 minutes. - - * - on_success - - Sequence - - A list of email addresses to be notified when a run successfully completes. A run is considered to have completed successfully if it ends with a `TERMINATED` `life_cycle_state` and a `SUCCESS` result_state. If not specified on job creation, reset, or update, the list is empty, and notifications are not sent. - - -### resources.jobs.git_source -An optional specification for a remote Git repository containing the source code used by tasks. Version-controlled source code is supported by notebook, dbt, Python script, and SQL File tasks. - -If `git_source` is set, these tasks retrieve the file from the remote repository by default. However, this behavior can be overridden by setting `source` to `WORKSPACE` on the task. - -Note: dbt and SQL File tasks support only version-controlled sources. If dbt or SQL File tasks are used, `git_source` must be defined on the job. - -#### Attributes - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - git_branch - - String - - Name of the branch to be checked out and used by this job. This field cannot be specified in conjunction with git_tag or git_commit. - - * - git_commit - - String - - Commit to be checked out and used by this job. This field cannot be specified in conjunction with git_branch or git_tag. - - * - git_provider - - String - - Unique identifier of the service used to host the Git repository. The value is case insensitive. - - * - git_tag - - String - - Name of the tag to be checked out and used by this job. This field cannot be specified in conjunction with git_branch or git_commit. - - * - git_url - - String - - URL of the repository to be cloned by this job. - - -### resources.jobs.health - - -#### Attributes - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - rules - - Sequence - - - - -### resources.jobs.notification_settings -Optional notification settings that are used when sending notifications to each of the `email_notifications` and `webhook_notifications` for this job. - -#### Attributes - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - no_alert_for_canceled_runs - - Boolean - - If true, do not send notifications to recipients specified in `on_failure` if the run is canceled. - - * - no_alert_for_skipped_runs - - Boolean - - If true, do not send notifications to recipients specified in `on_failure` if the run is skipped. - - -### resources.jobs.queue -The queue settings of the job. - -#### Attributes - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - enabled - - Boolean - - If true, enable queueing for the job. This is a required field. - - -### resources.jobs.run_as - - -#### Attributes - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - service_principal_name - - String - - Application ID of an active service principal. Setting this field requires the `servicePrincipal/user` role. - - * - user_name - - String - - The email of an active workspace user. Non-admin users can only set this field to their own email. - - -### resources.jobs.schedule -An optional periodic schedule for this job. The default behavior is that the job only runs when triggered by clicking “Run Now” in the Jobs UI or sending an API request to `runNow`. - -#### Attributes - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - pause_status - - String - - Indicate whether this schedule is paused or not. - - * - quartz_cron_expression - - String - - A Cron expression using Quartz syntax that describes the schedule for a job. See [Cron Trigger](http://www.quartz-scheduler.org/documentation/quartz-2.3.0/tutorials/crontrigger.html) for details. This field is required. - - * - timezone_id - - String - - A Java timezone ID. The schedule for a job is resolved with respect to this timezone. See [Java TimeZone](https://docs.oracle.com/javase/7/docs/api/java/util/TimeZone.html) for details. This field is required. - - -### resources.jobs.trigger -A configuration to trigger a run when certain conditions are met. The default behavior is that the job runs only when triggered by clicking “Run Now” in the Jobs UI or sending an API request to `runNow`. - -#### Attributes - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - file_arrival - - Map - - File arrival trigger settings. - - * - pause_status - - String - - Whether this trigger is paused or not. - - * - periodic - - Map - - Periodic trigger settings. - - * - table - - Map - - Old table trigger settings name. Deprecated in favor of `table_update`. - - * - table_update - - Map - - - - -### resources.jobs.trigger.file_arrival -File arrival trigger settings. - -#### Attributes - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - min_time_between_triggers_seconds - - Integer - - If set, the trigger starts a run only after the specified amount of time passed since the last time the trigger fired. The minimum allowed value is 60 seconds - - * - url - - String - - URL to be monitored for file arrivals. The path must point to the root or a subpath of the external location. - - * - wait_after_last_change_seconds - - Integer - - If set, the trigger starts a run only after no file activity has occurred for the specified amount of time. This makes it possible to wait for a batch of incoming files to arrive before triggering a run. The minimum allowed value is 60 seconds. - - -### resources.jobs.trigger.periodic -Periodic trigger settings. - -#### Attributes - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - interval - - Integer - - The interval at which the trigger should run. - - * - unit - - String - - The unit of time for the interval. - - -### resources.jobs.trigger.table -Old table trigger settings name. Deprecated in favor of `table_update`. - -#### Attributes - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - condition - - String - - The table(s) condition based on which to trigger a job run. - - * - min_time_between_triggers_seconds - - Integer - - If set, the trigger starts a run only after the specified amount of time has passed since the last time the trigger fired. The minimum allowed value is 60 seconds. - - * - table_names - - Sequence - - A list of Delta tables to monitor for changes. The table name must be in the format `catalog_name.schema_name.table_name`. - - * - wait_after_last_change_seconds - - Integer - - If set, the trigger starts a run only after no table updates have occurred for the specified time and can be used to wait for a series of table updates before triggering a run. The minimum allowed value is 60 seconds. - - -### resources.jobs.trigger.table_update - - -#### Attributes - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - condition - - String - - The table(s) condition based on which to trigger a job run. - - * - min_time_between_triggers_seconds - - Integer - - If set, the trigger starts a run only after the specified amount of time has passed since the last time the trigger fired. The minimum allowed value is 60 seconds. - - * - table_names - - Sequence - - A list of Delta tables to monitor for changes. The table name must be in the format `catalog_name.schema_name.table_name`. - - * - wait_after_last_change_seconds - - Integer - - If set, the trigger starts a run only after no table updates have occurred for the specified time and can be used to wait for a series of table updates before triggering a run. The minimum allowed value is 60 seconds. - - -### resources.jobs.webhook_notifications -A collection of system notification IDs to notify when runs of this job begin or complete. - -#### Attributes - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - on_duration_warning_threshold_exceeded - - Sequence - - An optional list of system notification IDs to call when the duration of a run exceeds the threshold specified for the `RUN_DURATION_SECONDS` metric in the `health` field. A maximum of 3 destinations can be specified for the `on_duration_warning_threshold_exceeded` property. - - * - on_failure - - Sequence - - An optional list of system notification IDs to call when the run fails. A maximum of 3 destinations can be specified for the `on_failure` property. - - * - on_start - - Sequence - - An optional list of system notification IDs to call when the run starts. A maximum of 3 destinations can be specified for the `on_start` property. - - * - on_streaming_backlog_exceeded - - Sequence - - An optional list of system notification IDs to call when any streaming backlog thresholds are exceeded for any stream. Streaming backlog thresholds can be set in the `health` field using the following metrics: `STREAMING_BACKLOG_BYTES`, `STREAMING_BACKLOG_RECORDS`, `STREAMING_BACKLOG_SECONDS`, or `STREAMING_BACKLOG_FILES`. Alerting is based on the 10-minute average of these metrics. If the issue persists, notifications are resent every 30 minutes. A maximum of 3 destinations can be specified for the `on_streaming_backlog_exceeded` property. - - * - on_success - - Sequence - - An optional list of system notification IDs to call when the run completes successfully. A maximum of 3 destinations can be specified for the `on_success` property. - - -### resources.model_serving_endpoints -The model serving endpoint definitions for the bundle. - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - - - Map - - Item of the `resources.model_serving_endpoints` map - -Each item has the following attributes: - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - ai_gateway - - Map - - The AI Gateway configuration for the serving endpoint. NOTE: only external model endpoints are supported as of now. - - * - config - - Map - - The core config of the serving endpoint. - - * - name - - String - - The name of the serving endpoint. This field is required and must be unique across a Databricks workspace. An endpoint name can consist of alphanumeric characters, dashes, and underscores. - - * - permissions - - Sequence - - - - * - rate_limits - - Sequence - - Rate limits to be applied to the serving endpoint. NOTE: this field is deprecated, please use AI Gateway to manage rate limits. - - * - route_optimized - - Boolean - - Enable route optimization for the serving endpoint. - - * - tags - - Sequence - - Tags to be attached to the serving endpoint and automatically propagated to billing logs. - - -### resources.model_serving_endpoints.ai_gateway -The AI Gateway configuration for the serving endpoint. NOTE: only external model endpoints are supported as of now. - -#### Attributes - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - guardrails - - Map - - Configuration for AI Guardrails to prevent unwanted data and unsafe data in requests and responses. - - * - inference_table_config - - Map - - Configuration for payload logging using inference tables. Use these tables to monitor and audit data being sent to and received from model APIs and to improve model quality. - - * - rate_limits - - Sequence - - Configuration for rate limits which can be set to limit endpoint traffic. - - * - usage_tracking_config - - Map - - Configuration to enable usage tracking using system tables. These tables allow you to monitor operational usage on endpoints and their associated costs. - - -### resources.model_serving_endpoints.ai_gateway.guardrails -Configuration for AI Guardrails to prevent unwanted data and unsafe data in requests and responses. - -#### Attributes - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - input - - Map - - Configuration for input guardrail filters. - - * - output - - Map - - Configuration for output guardrail filters. - - -### resources.model_serving_endpoints.ai_gateway.guardrails.input -Configuration for input guardrail filters. - -#### Attributes - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - invalid_keywords - - Sequence - - List of invalid keywords. AI guardrail uses keyword or string matching to decide if the keyword exists in the request or response content. - - * - pii - - Map - - Configuration for guardrail PII filter. - - * - safety - - Boolean - - Indicates whether the safety filter is enabled. - - * - valid_topics - - Sequence - - The list of allowed topics. Given a chat request, this guardrail flags the request if its topic is not in the allowed topics. - - -### resources.model_serving_endpoints.ai_gateway.guardrails.input.pii -Configuration for guardrail PII filter. - -#### Attributes - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - behavior - - String - - Behavior for PII filter. Currently only 'BLOCK' is supported. If 'BLOCK' is set for the input guardrail and the request contains PII, the request is not sent to the model server and 400 status code is returned; if 'BLOCK' is set for the output guardrail and the model response contains PII, the PII info in the response is redacted and 400 status code is returned. - - -### resources.model_serving_endpoints.ai_gateway.guardrails.output -Configuration for output guardrail filters. - -#### Attributes - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - invalid_keywords - - Sequence - - List of invalid keywords. AI guardrail uses keyword or string matching to decide if the keyword exists in the request or response content. - - * - pii - - Map - - Configuration for guardrail PII filter. - - * - safety - - Boolean - - Indicates whether the safety filter is enabled. - - * - valid_topics - - Sequence - - The list of allowed topics. Given a chat request, this guardrail flags the request if its topic is not in the allowed topics. - - -### resources.model_serving_endpoints.ai_gateway.guardrails.output.pii -Configuration for guardrail PII filter. - -#### Attributes - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - behavior - - String - - Behavior for PII filter. Currently only 'BLOCK' is supported. If 'BLOCK' is set for the input guardrail and the request contains PII, the request is not sent to the model server and 400 status code is returned; if 'BLOCK' is set for the output guardrail and the model response contains PII, the PII info in the response is redacted and 400 status code is returned. - - -### resources.model_serving_endpoints.ai_gateway.inference_table_config -Configuration for payload logging using inference tables. Use these tables to monitor and audit data being sent to and received from model APIs and to improve model quality. - -#### Attributes - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - catalog_name - - String - - The name of the catalog in Unity Catalog. Required when enabling inference tables. NOTE: On update, you have to disable inference table first in order to change the catalog name. - - * - enabled - - Boolean - - Indicates whether the inference table is enabled. - - * - schema_name - - String - - The name of the schema in Unity Catalog. Required when enabling inference tables. NOTE: On update, you have to disable inference table first in order to change the schema name. - - * - table_name_prefix - - String - - The prefix of the table in Unity Catalog. NOTE: On update, you have to disable inference table first in order to change the prefix name. - - -### resources.model_serving_endpoints.ai_gateway.usage_tracking_config -Configuration to enable usage tracking using system tables. These tables allow you to monitor operational usage on endpoints and their associated costs. - -#### Attributes - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - enabled - - Boolean - - Whether to enable usage tracking. - - -### resources.model_serving_endpoints.config -The core config of the serving endpoint. - -#### Attributes - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - auto_capture_config - - Map - - Configuration for Inference Tables which automatically logs requests and responses to Unity Catalog. - - * - served_entities - - Sequence - - A list of served entities for the endpoint to serve. A serving endpoint can have up to 15 served entities. - - * - served_models - - Sequence - - (Deprecated, use served_entities instead) A list of served models for the endpoint to serve. A serving endpoint can have up to 15 served models. - - * - traffic_config - - Map - - The traffic config defining how invocations to the serving endpoint should be routed. - - -### resources.model_serving_endpoints.config.auto_capture_config -Configuration for Inference Tables which automatically logs requests and responses to Unity Catalog. - -#### Attributes - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - catalog_name - - String - - The name of the catalog in Unity Catalog. NOTE: On update, you cannot change the catalog name if the inference table is already enabled. - - * - enabled - - Boolean - - Indicates whether the inference table is enabled. - - * - schema_name - - String - - The name of the schema in Unity Catalog. NOTE: On update, you cannot change the schema name if the inference table is already enabled. - - * - table_name_prefix - - String - - The prefix of the table in Unity Catalog. NOTE: On update, you cannot change the prefix name if the inference table is already enabled. - - -### resources.model_serving_endpoints.config.traffic_config -The traffic config defining how invocations to the serving endpoint should be routed. - -#### Attributes - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - routes - - Sequence - - The list of routes that define traffic to each served entity. - - -### resources.models -The model definitions for the bundle. - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - - - Map - - Item of the `resources.models` map - -Each item has the following attributes: - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - creation_timestamp - - Integer - - Timestamp recorded when this `registered_model` was created. - - * - description - - String - - Description of this `registered_model`. - - * - last_updated_timestamp - - Integer - - Timestamp recorded when metadata for this `registered_model` was last updated. - - * - latest_versions - - Sequence - - Collection of latest model versions for each stage. Only contains models with current `READY` status. - - * - name - - String - - Unique name for the model. - - * - permissions - - Sequence - - - - * - tags - - Sequence - - Tags: Additional metadata key-value pairs for this `registered_model`. - - * - user_id - - String - - User that created this `registered_model` - - -### resources.pipelines -The pipeline definitions for the bundle. - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - - - Map - - Item of the `resources.pipelines` map - -Each item has the following attributes: - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - budget_policy_id - - String - - Budget policy of this pipeline. - - * - catalog - - String - - A catalog in Unity Catalog to publish data from this pipeline to. If `target` is specified, tables in this pipeline are published to a `target` schema inside `catalog` (for example, `catalog`.`target`.`table`). If `target` is not specified, no data is published to Unity Catalog. - - * - channel - - String - - DLT Release Channel that specifies which version to use. - - * - clusters - - Sequence - - Cluster settings for this pipeline deployment. - - * - configuration - - Map - - String-String configuration for this pipeline execution. - - * - continuous - - Boolean - - Whether the pipeline is continuous or triggered. This replaces `trigger`. - - * - deployment - - Map - - Deployment type of this pipeline. - - * - development - - Boolean - - Whether the pipeline is in Development mode. Defaults to false. - - * - edition - - String - - Pipeline product edition. - - * - filters - - Map - - Filters on which Pipeline packages to include in the deployed graph. - - * - gateway_definition - - Map - - The definition of a gateway pipeline to support change data capture. - - * - id - - String - - Unique identifier for this pipeline. - - * - ingestion_definition - - Map - - The configuration for a managed ingestion pipeline. These settings cannot be used with the 'libraries', 'target' or 'catalog' settings. - - * - libraries - - Sequence - - Libraries or code needed by this deployment. - - * - name - - String - - Friendly identifier for this pipeline. - - * - notifications - - Sequence - - List of notification settings for this pipeline. - - * - permissions - - Sequence - - - - * - photon - - Boolean - - Whether Photon is enabled for this pipeline. - - * - restart_window - - Map - - Restart window of this pipeline. - - * - schema - - String - - The default schema (database) where tables are read from or published to. The presence of this field implies that the pipeline is in direct publishing mode. - - * - serverless - - Boolean - - Whether serverless compute is enabled for this pipeline. - - * - storage - - String - - DBFS root directory for storing checkpoints and tables. - - * - target - - String - - Target schema (database) to add tables in this pipeline to. If not specified, no data is published to the Hive metastore or Unity Catalog. To publish to Unity Catalog, also specify `catalog`. - - * - trigger - - Map - - Which pipeline trigger to use. Deprecated: Use `continuous` instead. - - -### resources.pipelines.deployment -Deployment type of this pipeline. - -#### Attributes - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - kind - - String - - The deployment method that manages the pipeline. - - * - metadata_file_path - - String - - The path to the file containing metadata about the deployment. - - -### resources.pipelines.filters -Filters on which Pipeline packages to include in the deployed graph. - -#### Attributes - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - exclude - - Sequence - - Paths to exclude. - - * - include - - Sequence - - Paths to include. - - -### resources.pipelines.gateway_definition -The definition of a gateway pipeline to support change data capture. - -#### Attributes - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - connection_id - - String - - [Deprecated, use connection_name instead] Immutable. The Unity Catalog connection that this gateway pipeline uses to communicate with the source. - - * - connection_name - - String - - Immutable. The Unity Catalog connection that this gateway pipeline uses to communicate with the source. - - * - gateway_storage_catalog - - String - - Required, Immutable. The name of the catalog for the gateway pipeline's storage location. - - * - gateway_storage_name - - String - - Optional. The Unity Catalog-compatible name for the gateway storage location. This is the destination to use for the data that is extracted by the gateway. Delta Live Tables system will automatically create the storage location under the catalog and schema. - - * - gateway_storage_schema - - String - - Required, Immutable. The name of the schema for the gateway pipelines's storage location. - - -### resources.pipelines.ingestion_definition -The configuration for a managed ingestion pipeline. These settings cannot be used with the 'libraries', 'target' or 'catalog' settings. - -#### Attributes - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - connection_name - - String - - Immutable. The Unity Catalog connection that this ingestion pipeline uses to communicate with the source. This is used with connectors for applications like Salesforce, Workday, and so on. - - * - ingestion_gateway_id - - String - - Immutable. Identifier for the gateway that is used by this ingestion pipeline to communicate with the source database. This is used with connectors to databases like SQL Server. - - * - objects - - Sequence - - Required. Settings specifying tables to replicate and the destination for the replicated tables. - - * - table_configuration - - Map - - Configuration settings to control the ingestion of tables. These settings are applied to all tables in the pipeline. - - -### resources.pipelines.ingestion_definition.table_configuration -Configuration settings to control the ingestion of tables. These settings are applied to all tables in the pipeline. - -#### Attributes - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - primary_keys - - Sequence - - The primary key of the table used to apply changes. - - * - salesforce_include_formula_fields - - Boolean - - If true, formula fields defined in the table are included in the ingestion. This setting is only valid for the Salesforce connector - - * - scd_type - - String - - The SCD type to use to ingest the table. - - * - sequence_by - - Sequence - - The column names specifying the logical order of events in the source data. Delta Live Tables uses this sequencing to handle change events that arrive out of order. - - -### resources.pipelines.restart_window -Restart window of this pipeline. - -#### Attributes - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - days_of_week - - String - - Days of week in which the restart is allowed to happen (within a five-hour window starting at start_hour). If not specified all days of the week will be used. - - * - start_hour - - Integer - - An integer between 0 and 23 denoting the start hour for the restart window in the 24-hour day. Continuous pipeline restart is triggered only within a five-hour window starting at this hour. - - * - time_zone_id - - String - - Time zone id of restart window. See https://docs.databricks.com/sql/language-manual/sql-ref-syntax-aux-conf-mgmt-set-timezone.html for details. If not specified, UTC will be used. - - -### resources.pipelines.trigger -Which pipeline trigger to use. Deprecated: Use `continuous` instead. - -#### Attributes - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - cron - - Map - - - - * - manual - - Map - - - - -### resources.pipelines.trigger.cron - - -#### Attributes - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - quartz_cron_schedule - - String - - - - * - timezone_id - - String - - - - -### resources.quality_monitors -The quality monitor definitions for the bundle. - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - - - Map - - Item of the `resources.quality_monitors` map - -Each item has the following attributes: - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - assets_dir - - String - - The directory to store monitoring assets (e.g. dashboard, metric tables). - - * - baseline_table_name - - String - - Name of the baseline table from which drift metrics are computed from. Columns in the monitored table should also be present in the baseline table. - - * - custom_metrics - - Sequence - - Custom metrics to compute on the monitored table. These can be aggregate metrics, derived metrics (from already computed aggregate metrics), or drift metrics (comparing metrics across time windows). - - * - data_classification_config - - Map - - The data classification config for the monitor. - - * - inference_log - - Map - - Configuration for monitoring inference logs. - - * - notifications - - Map - - The notification settings for the monitor. - - * - output_schema_name - - String - - Schema where output metric tables are created. - - * - schedule - - Map - - The schedule for automatically updating and refreshing metric tables. - - * - skip_builtin_dashboard - - Boolean - - Whether to skip creating a default dashboard summarizing data quality metrics. - - * - slicing_exprs - - Sequence - - List of column expressions to slice data with for targeted analysis. The data is grouped by each expression independently, resulting in a separate slice for each predicate and its complements. For high-cardinality columns, only the top 100 unique values by frequency will generate slices. - - * - snapshot - - Map - - Configuration for monitoring snapshot tables. - - * - table_name - - String - - - - * - time_series - - Map - - Configuration for monitoring time series tables. - - * - warehouse_id - - String - - Optional argument to specify the warehouse for dashboard creation. If not specified, the first running warehouse will be used. - - -### resources.quality_monitors.data_classification_config -The data classification config for the monitor. - -#### Attributes - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - enabled - - Boolean - - Whether data classification is enabled. - - -### resources.quality_monitors.inference_log -Configuration for monitoring inference logs. - -#### Attributes - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - granularities - - Sequence - - Granularities for aggregating data into time windows based on their timestamp. Currently the following static granularities are supported: {``"5 minutes"``, ``"30 minutes"``, ``"1 hour"``, ``"1 day"``, ``" week(s)"``, ``"1 month"``, ``"1 year"``}. - - * - label_col - - String - - Optional column that contains the ground truth for the prediction. - - * - model_id_col - - String - - Column that contains the id of the model generating the predictions. Metrics will be computed per model id by default, and also across all model ids. - - * - prediction_col - - String - - Column that contains the output/prediction from the model. - - * - prediction_proba_col - - String - - Optional column that contains the prediction probabilities for each class in a classification problem type. The values in this column should be a map, mapping each class label to the prediction probability for a given sample. The map should be of PySpark MapType(). - - * - problem_type - - String - - Problem type the model aims to solve. Determines the type of model-quality metrics that will be computed. - - * - timestamp_col - - String - - Column that contains the timestamps of requests. The column must be one of the following: - A ``TimestampType`` column - A column whose values can be converted to timestamps through the pyspark ``to_timestamp`` [function](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.functions.to_timestamp.html). - - -### resources.quality_monitors.notifications -The notification settings for the monitor. - -#### Attributes - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - on_failure - - Map - - Who to send notifications to on monitor failure. - - * - on_new_classification_tag_detected - - Map - - Who to send notifications to when new data classification tags are detected. - - -### resources.quality_monitors.notifications.on_failure -Who to send notifications to on monitor failure. - -#### Attributes - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - email_addresses - - Sequence - - The list of email addresses to send the notification to. A maximum of 5 email addresses is supported. - - -### resources.quality_monitors.notifications.on_new_classification_tag_detected -Who to send notifications to when new data classification tags are detected. - -#### Attributes - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - email_addresses - - Sequence - - The list of email addresses to send the notification to. A maximum of 5 email addresses is supported. - - -### resources.quality_monitors.schedule -The schedule for automatically updating and refreshing metric tables. - -#### Attributes - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - pause_status - - String - - Read only field that indicates whether a schedule is paused or not. - - * - quartz_cron_expression - - String - - The expression that determines when to run the monitor. See [examples](https://www.quartz-scheduler.org/documentation/quartz-2.3.0/tutorials/crontrigger.html). - - * - timezone_id - - String - - The timezone id (e.g., ``"PST"``) in which to evaluate the quartz expression. - - -### resources.quality_monitors.time_series -Configuration for monitoring time series tables. - -#### Attributes - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - granularities - - Sequence - - Granularities for aggregating data into time windows based on their timestamp. Currently the following static granularities are supported: {``"5 minutes"``, ``"30 minutes"``, ``"1 hour"``, ``"1 day"``, ``" week(s)"``, ``"1 month"``, ``"1 year"``}. - - * - timestamp_col - - String - - Column that contains the timestamps of requests. The column must be one of the following: - A ``TimestampType`` column - A column whose values can be converted to timestamps through the pyspark ``to_timestamp`` [function](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.functions.to_timestamp.html). - - -### resources.registered_models -The registered model definitions for the bundle. - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - - - Map - - Item of the `resources.registered_models` map - -Each item has the following attributes: - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - catalog_name - - String - - The name of the catalog where the schema and the registered model reside - - * - comment - - String - - The comment attached to the registered model - - * - grants - - Sequence - - - - * - name - - String - - The name of the registered model - - * - schema_name - - String - - The name of the schema where the registered model resides - - * - storage_location - - String - - The storage location on the cloud under which model version data files are stored - - -### resources.schemas -The schema definitions for the bundle. - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - - - Map - - Item of the `resources.schemas` map - -Each item has the following attributes: - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - catalog_name - - String - - Name of parent catalog. - - * - comment - - String - - User-provided free-form text description. - - * - grants - - Sequence - - - - * - name - - String - - Name of schema, relative to parent catalog. - - * - properties - - Map - - - - * - storage_root - - String - - Storage root URL for managed tables within schema. - - -### resources.volumes - - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - - - Map - - Item of the `resources.volumes` map - -Each item has the following attributes: - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - catalog_name - - String - - The name of the catalog where the schema and the volume are - - * - comment - - String - - The comment attached to the volume - - * - grants - - Sequence - - - - * - name - - String - - The name of the volume - - * - schema_name - - String - - The name of the schema where the volume is - - * - storage_location - - String - - The storage location on the cloud - - * - volume_type - - String - - - - -## run_as -The identity to use to run the bundle. - -#### Attributes - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - service_principal_name - - String - - Application ID of an active service principal. Setting this field requires the `servicePrincipal/user` role. - - * - user_name - - String - - The email of an active workspace user. Non-admin users can only set this field to their own email. - - -## sync -The files and file paths to include or exclude in the bundle. See [_](/dev-tools/bundles/) - -#### Attributes - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - exclude - - Sequence - - A list of files or folders to exclude from the bundle. - - * - include - - Sequence - - A list of files or folders to include in the bundle. - - * - paths - - Sequence - - The local folder paths, which can be outside the bundle root, to synchronize to the workspace when the bundle is deployed. - - -## targets -Defines deployment targets for the bundle. - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - - - Map - - Item of the `targets` map - -Each item has the following attributes: - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - artifacts - - Map - - The artifacts to include in the target deployment. See [_](#artifact) - - * - bundle - - Map - - The name of the bundle when deploying to this target. - - * - cluster_id - - String - - The ID of the cluster to use for this target. - - * - compute_id - - String - - Deprecated. The ID of the compute to use for this target. - - * - default - - Boolean - - Whether this target is the default target. - - * - git - - Map - - The Git version control settings for the target. See [_](#git). - - * - mode - - String - - The deployment mode for the target. Valid values are `development` or `production`. See [_](/dev-tools/bundles/deployment-modes.md). - - * - permissions - - Sequence - - The permissions for deploying and running the bundle in the target. See [_](#permission). - - * - presets - - Map - - The deployment presets for the target. See [_](#preset). - - * - resources - - Map - - The resource definitions for the target. See [_](#resources). - - * - run_as - - Map - - The identity to use to run the bundle. See [_](#job_run_as) and [_](/dev-tools/bundles/run_as.md). - - * - sync - - Map - - The local paths to sync to the target workspace when a bundle is run or deployed. See [_](#sync). - - * - variables - - Map - - The custom variable definitions for the target. See [_](/dev-tools/bundles/settings.md#variables) and [_](/dev-tools/bundles/variables.md). - - * - workspace - - Map - - The Databricks workspace for the target. [_](#workspace) - - -### targets.artifacts -The artifacts to include in the target deployment. - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - - - Map - - Item of the `targets.artifacts` map - -Each item has the following attributes: - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - build - - String - - An optional set of non-default build commands that you want to run locally before deployment. For Python wheel builds, the Databricks CLI assumes that it can find a local install of the Python wheel package to run builds, and it runs the command python setup.py bdist_wheel by default during each bundle deployment. To specify multiple build commands, separate each command with double-ampersand (&&) characters. - - * - executable - - String - - The executable type. - - * - files - - Sequence - - The source files for the artifact, defined as an [_](#artifact_file). - - * - path - - String - - The location where the built artifact will be saved. - - * - type - - String - - The type of the artifact. Valid values are `wheel` or `jar` - - -### targets.bundle -The name of the bundle when deploying to this target. - -#### Attributes - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - cluster_id - - String - - The ID of a cluster to use to run the bundle. See [_](/dev-tools/bundles/settings.md#cluster_id). - - * - compute_id - - String - - - - * - databricks_cli_version - - String - - The Databricks CLI version to use for the bundle. See [_](/dev-tools/bundles/settings.md#databricks_cli_version). - - * - deployment - - Map - - The definition of the bundle deployment. For supported attributes, see [_](#deployment) and [_](/dev-tools/bundles/deployment-modes.md). - - * - git - - Map - - The Git version control details that are associated with your bundle. For supported attributes, see [_](#git) and [_](/dev-tools/bundles/settings.md#git). - - * - name - - String - - The name of the bundle. - - * - uuid - - String - - - - -### targets.bundle.deployment -The definition of the bundle deployment - -#### Attributes - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - fail_on_active_runs - - Boolean - - Whether to fail on active runs. If this is set to true a deployment that is running can be interrupted. - - * - lock - - Map - - The deployment lock attributes. See [_](#lock). - - -### targets.bundle.deployment.lock -The deployment lock attributes. - -#### Attributes - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - enabled - - Boolean - - Whether this lock is enabled. - - * - force - - Boolean - - Whether to force this lock if it is enabled. - - -### targets.bundle.git -The Git version control details that are associated with your bundle. - -#### Attributes - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - branch - - String - - The Git branch name. See [_](/dev-tools/bundles/settings.md#git). - - * - origin_url - - String - - The origin URL of the repository. See [_](/dev-tools/bundles/settings.md#git). - - -### targets.git -The Git version control settings for the target. - -#### Attributes - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - branch - - String - - The Git branch name. See [_](/dev-tools/bundles/settings.md#git). - - * - origin_url - - String - - The origin URL of the repository. See [_](/dev-tools/bundles/settings.md#git). - - -### targets.presets -The deployment presets for the target. - -#### Attributes - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - jobs_max_concurrent_runs - - Integer - - The maximum concurrent runs for a job. - - * - name_prefix - - String - - The prefix for job runs of the bundle. - - * - pipelines_development - - Boolean - - Whether pipeline deployments should be locked in development mode. - - * - source_linked_deployment - - Boolean - - Whether to link the deployment to the bundle source. - - * - tags - - Map - - The tags for the bundle deployment. - - * - trigger_pause_status - - String - - A pause status to apply to all job triggers and schedules. Valid values are PAUSED or UNPAUSED. - - -### targets.resources -The resource definitions for the target. - -#### Attributes - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - clusters - - Map - - The cluster definitions for the bundle. See [_](/dev-tools/bundles/resources.md#cluster) - - * - dashboards - - Map - - The dashboard definitions for the bundle. See [_](/dev-tools/bundles/resources.md#dashboard) - - * - experiments - - Map - - The experiment definitions for the bundle. See [_](/dev-tools/bundles/resources.md#experiment) - - * - jobs - - Map - - The job definitions for the bundle. See [_](/dev-tools/bundles/resources.md#job) - - * - model_serving_endpoints - - Map - - The model serving endpoint definitions for the bundle. See [_](/dev-tools/bundles/resources.md#model_serving_endpoint) - - * - models - - Map - - The model definitions for the bundle. See [_](/dev-tools/bundles/resources.md#model) - - * - pipelines - - Map - - The pipeline definitions for the bundle. See [_](/dev-tools/bundles/resources.md#pipeline) - - * - quality_monitors - - Map - - The quality monitor definitions for the bundle. See [_](/dev-tools/bundles/resources.md#quality_monitor) - - * - registered_models - - Map - - The registered model definitions for the bundle. See [_](/dev-tools/bundles/resources.md#registered_model) - - * - schemas - - Map - - The schema definitions for the bundle. See [_](/dev-tools/bundles/resources.md#schema) - - * - volumes - - Map - - - - -### targets.resources.clusters -The cluster definitions for the bundle. - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - - - Map - - Item of the `targets.resources.clusters` map - -Each item has the following attributes: - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - apply_policy_default_values - - Boolean - - When set to true, fixed and default values from the policy will be used for fields that are omitted. When set to false, only fixed values from the policy will be applied. - - * - autoscale - - Map - - Parameters needed in order to automatically scale clusters up and down based on load. Note: autoscaling works best with DB runtime versions 3.0 or later. - - * - autotermination_minutes - - Integer - - Automatically terminates the cluster after it is inactive for this time in minutes. If not set, this cluster will not be automatically terminated. If specified, the threshold must be between 10 and 10000 minutes. Users can also set this value to 0 to explicitly disable automatic termination. - - * - aws_attributes - - Map - - Attributes related to clusters running on Amazon Web Services. If not specified at cluster creation, a set of default values will be used. - - * - azure_attributes - - Map - - Attributes related to clusters running on Microsoft Azure. If not specified at cluster creation, a set of default values will be used. - - * - cluster_log_conf - - Map - - The configuration for delivering spark logs to a long-term storage destination. Two kinds of destinations (dbfs and s3) are supported. Only one destination can be specified for one cluster. If the conf is given, the logs will be delivered to the destination every `5 mins`. The destination of driver logs is `$destination/$clusterId/driver`, while the destination of executor logs is `$destination/$clusterId/executor`. - - * - cluster_name - - String - - Cluster name requested by the user. This doesn't have to be unique. If not specified at creation, the cluster name will be an empty string. - - * - custom_tags - - Map - - Additional tags for cluster resources. Databricks will tag all cluster resources (e.g., AWS instances and EBS volumes) with these tags in addition to `default_tags`. Notes: - Currently, Databricks allows at most 45 custom tags - Clusters can only reuse cloud resources if the resources' tags are a subset of the cluster tags - - * - data_security_mode - - String - - - - * - docker_image - - Map - - - - * - driver_instance_pool_id - - String - - The optional ID of the instance pool for the driver of the cluster belongs. The pool cluster uses the instance pool with id (instance_pool_id) if the driver pool is not assigned. - - * - driver_node_type_id - - String - - The node type of the Spark driver. Note that this field is optional; if unset, the driver node type will be set as the same value as `node_type_id` defined above. - - * - enable_elastic_disk - - Boolean - - Autoscaling Local Storage: when enabled, this cluster will dynamically acquire additional disk space when its Spark workers are running low on disk space. This feature requires specific AWS permissions to function correctly - refer to the User Guide for more details. - - * - enable_local_disk_encryption - - Boolean - - Whether to enable LUKS on cluster VMs' local disks - - * - gcp_attributes - - Map - - Attributes related to clusters running on Google Cloud Platform. If not specified at cluster creation, a set of default values will be used. - - * - init_scripts - - Sequence - - The configuration for storing init scripts. Any number of destinations can be specified. The scripts are executed sequentially in the order provided. If `cluster_log_conf` is specified, init script logs are sent to `//init_scripts`. - - * - instance_pool_id - - String - - The optional ID of the instance pool to which the cluster belongs. - - * - node_type_id - - String - - This field encodes, through a single value, the resources available to each of the Spark nodes in this cluster. For example, the Spark nodes can be provisioned and optimized for memory or compute intensive workloads. A list of available node types can be retrieved by using the :method:clusters/listNodeTypes API call. - - * - num_workers - - Integer - - Number of worker nodes that this cluster should have. A cluster has one Spark Driver and `num_workers` Executors for a total of `num_workers` + 1 Spark nodes. Note: When reading the properties of a cluster, this field reflects the desired number of workers rather than the actual current number of workers. For instance, if a cluster is resized from 5 to 10 workers, this field will immediately be updated to reflect the target size of 10 workers, whereas the workers listed in `spark_info` will gradually increase from 5 to 10 as the new nodes are provisioned. - - * - permissions - - Sequence - - - - * - policy_id - - String - - The ID of the cluster policy used to create the cluster if applicable. - - * - runtime_engine - - String - - - - * - single_user_name - - String - - Single user name if data_security_mode is `SINGLE_USER` - - * - spark_conf - - Map - - An object containing a set of optional, user-specified Spark configuration key-value pairs. Users can also pass in a string of extra JVM options to the driver and the executors via `spark.driver.extraJavaOptions` and `spark.executor.extraJavaOptions` respectively. - - * - spark_env_vars - - Map - - An object containing a set of optional, user-specified environment variable key-value pairs. Please note that key-value pair of the form (X,Y) will be exported as is (i.e., `export X='Y'`) while launching the driver and workers. In order to specify an additional set of `SPARK_DAEMON_JAVA_OPTS`, we recommend appending them to `$SPARK_DAEMON_JAVA_OPTS` as shown in the example below. This ensures that all default databricks managed environmental variables are included as well. Example Spark environment variables: `{"SPARK_WORKER_MEMORY": "28000m", "SPARK_LOCAL_DIRS": "/local_disk0"}` or `{"SPARK_DAEMON_JAVA_OPTS": "$SPARK_DAEMON_JAVA_OPTS -Dspark.shuffle.service.enabled=true"}` - - * - spark_version - - String - - The Spark version of the cluster, e.g. `3.3.x-scala2.11`. A list of available Spark versions can be retrieved by using the :method:clusters/sparkVersions API call. - - * - ssh_public_keys - - Sequence - - SSH public key contents that will be added to each Spark node in this cluster. The corresponding private keys can be used to login with the user name `ubuntu` on port `2200`. Up to 10 keys can be specified. - - * - workload_type - - Map - - - - -### targets.resources.clusters.autoscale -Parameters needed in order to automatically scale clusters up and down based on load. -Note: autoscaling works best with DB runtime versions 3.0 or later. - -#### Attributes - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - max_workers - - Integer - - The maximum number of workers to which the cluster can scale up when overloaded. Note that `max_workers` must be strictly greater than `min_workers`. - - * - min_workers - - Integer - - The minimum number of workers to which the cluster can scale down when underutilized. It is also the initial number of workers the cluster will have after creation. - - -### targets.resources.clusters.aws_attributes -Attributes related to clusters running on Amazon Web Services. -If not specified at cluster creation, a set of default values will be used. - -#### Attributes - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - availability - - String - - - - * - ebs_volume_count - - Integer - - The number of volumes launched for each instance. Users can choose up to 10 volumes. This feature is only enabled for supported node types. Legacy node types cannot specify custom EBS volumes. For node types with no instance store, at least one EBS volume needs to be specified; otherwise, cluster creation will fail. These EBS volumes will be mounted at `/ebs0`, `/ebs1`, and etc. Instance store volumes will be mounted at `/local_disk0`, `/local_disk1`, and etc. If EBS volumes are attached, Databricks will configure Spark to use only the EBS volumes for scratch storage because heterogenously sized scratch devices can lead to inefficient disk utilization. If no EBS volumes are attached, Databricks will configure Spark to use instance store volumes. Please note that if EBS volumes are specified, then the Spark configuration `spark.local.dir` will be overridden. - - * - ebs_volume_iops - - Integer - - If using gp3 volumes, what IOPS to use for the disk. If this is not set, the maximum performance of a gp2 volume with the same volume size will be used. - - * - ebs_volume_size - - Integer - - The size of each EBS volume (in GiB) launched for each instance. For general purpose SSD, this value must be within the range 100 - 4096. For throughput optimized HDD, this value must be within the range 500 - 4096. - - * - ebs_volume_throughput - - Integer - - If using gp3 volumes, what throughput to use for the disk. If this is not set, the maximum performance of a gp2 volume with the same volume size will be used. - - * - ebs_volume_type - - String - - - - * - first_on_demand - - Integer - - The first `first_on_demand` nodes of the cluster will be placed on on-demand instances. If this value is greater than 0, the cluster driver node in particular will be placed on an on-demand instance. If this value is greater than or equal to the current cluster size, all nodes will be placed on on-demand instances. If this value is less than the current cluster size, `first_on_demand` nodes will be placed on on-demand instances and the remainder will be placed on `availability` instances. Note that this value does not affect cluster size and cannot currently be mutated over the lifetime of a cluster. - - * - instance_profile_arn - - String - - Nodes for this cluster will only be placed on AWS instances with this instance profile. If ommitted, nodes will be placed on instances without an IAM instance profile. The instance profile must have previously been added to the Databricks environment by an account administrator. This feature may only be available to certain customer plans. If this field is ommitted, we will pull in the default from the conf if it exists. - - * - spot_bid_price_percent - - Integer - - The bid price for AWS spot instances, as a percentage of the corresponding instance type's on-demand price. For example, if this field is set to 50, and the cluster needs a new `r3.xlarge` spot instance, then the bid price is half of the price of on-demand `r3.xlarge` instances. Similarly, if this field is set to 200, the bid price is twice the price of on-demand `r3.xlarge` instances. If not specified, the default value is 100. When spot instances are requested for this cluster, only spot instances whose bid price percentage matches this field will be considered. Note that, for safety, we enforce this field to be no more than 10000. The default value and documentation here should be kept consistent with CommonConf.defaultSpotBidPricePercent and CommonConf.maxSpotBidPricePercent. - - * - zone_id - - String - - Identifier for the availability zone/datacenter in which the cluster resides. This string will be of a form like "us-west-2a". The provided availability zone must be in the same region as the Databricks deployment. For example, "us-west-2a" is not a valid zone id if the Databricks deployment resides in the "us-east-1" region. This is an optional field at cluster creation, and if not specified, a default zone will be used. If the zone specified is "auto", will try to place cluster in a zone with high availability, and will retry placement in a different AZ if there is not enough capacity. The list of available zones as well as the default value can be found by using the `List Zones` method. - - -### targets.resources.clusters.azure_attributes -Attributes related to clusters running on Microsoft Azure. -If not specified at cluster creation, a set of default values will be used. - -#### Attributes - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - availability - - String - - - - * - first_on_demand - - Integer - - The first `first_on_demand` nodes of the cluster will be placed on on-demand instances. This value should be greater than 0, to make sure the cluster driver node is placed on an on-demand instance. If this value is greater than or equal to the current cluster size, all nodes will be placed on on-demand instances. If this value is less than the current cluster size, `first_on_demand` nodes will be placed on on-demand instances and the remainder will be placed on `availability` instances. Note that this value does not affect cluster size and cannot currently be mutated over the lifetime of a cluster. - - * - log_analytics_info - - Map - - Defines values necessary to configure and run Azure Log Analytics agent - - * - spot_bid_max_price - - Any - - The max bid price to be used for Azure spot instances. The Max price for the bid cannot be higher than the on-demand price of the instance. If not specified, the default value is -1, which specifies that the instance cannot be evicted on the basis of price, and only on the basis of availability. Further, the value should > 0 or -1. - - -### targets.resources.clusters.azure_attributes.log_analytics_info -Defines values necessary to configure and run Azure Log Analytics agent - -#### Attributes - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - log_analytics_primary_key - - String - - - - * - log_analytics_workspace_id - - String - - - - -### targets.resources.clusters.cluster_log_conf -The configuration for delivering spark logs to a long-term storage destination. -Two kinds of destinations (dbfs and s3) are supported. Only one destination can be specified -for one cluster. If the conf is given, the logs will be delivered to the destination every -`5 mins`. The destination of driver logs is `$destination/$clusterId/driver`, while -the destination of executor logs is `$destination/$clusterId/executor`. - -#### Attributes - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - dbfs - - Map - - destination needs to be provided. e.g. `{ "dbfs" : { "destination" : "dbfs:/home/cluster_log" } }` - - * - s3 - - Map - - destination and either the region or endpoint need to be provided. e.g. `{ "s3": { "destination" : "s3://cluster_log_bucket/prefix", "region" : "us-west-2" } }` Cluster iam role is used to access s3, please make sure the cluster iam role in `instance_profile_arn` has permission to write data to the s3 destination. - - -### targets.resources.clusters.cluster_log_conf.dbfs -destination needs to be provided. e.g. -`{ "dbfs" : { "destination" : "dbfs:/home/cluster_log" } }` - -#### Attributes - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - destination - - String - - dbfs destination, e.g. `dbfs:/my/path` - - -### targets.resources.clusters.cluster_log_conf.s3 -destination and either the region or endpoint need to be provided. e.g. -`{ "s3": { "destination" : "s3://cluster_log_bucket/prefix", "region" : "us-west-2" } }` -Cluster iam role is used to access s3, please make sure the cluster iam role in -`instance_profile_arn` has permission to write data to the s3 destination. - -#### Attributes - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - canned_acl - - String - - (Optional) Set canned access control list for the logs, e.g. `bucket-owner-full-control`. If `canned_cal` is set, please make sure the cluster iam role has `s3:PutObjectAcl` permission on the destination bucket and prefix. The full list of possible canned acl can be found at http://docs.aws.amazon.com/AmazonS3/latest/dev/acl-overview.html#canned-acl. Please also note that by default only the object owner gets full controls. If you are using cross account role for writing data, you may want to set `bucket-owner-full-control` to make bucket owner able to read the logs. - - * - destination - - String - - S3 destination, e.g. `s3://my-bucket/some-prefix` Note that logs will be delivered using cluster iam role, please make sure you set cluster iam role and the role has write access to the destination. Please also note that you cannot use AWS keys to deliver logs. - - * - enable_encryption - - Boolean - - (Optional) Flag to enable server side encryption, `false` by default. - - * - encryption_type - - String - - (Optional) The encryption type, it could be `sse-s3` or `sse-kms`. It will be used only when encryption is enabled and the default type is `sse-s3`. - - * - endpoint - - String - - S3 endpoint, e.g. `https://s3-us-west-2.amazonaws.com`. Either region or endpoint needs to be set. If both are set, endpoint will be used. - - * - kms_key - - String - - (Optional) Kms key which will be used if encryption is enabled and encryption type is set to `sse-kms`. - - * - region - - String - - S3 region, e.g. `us-west-2`. Either region or endpoint needs to be set. If both are set, endpoint will be used. - - -### targets.resources.clusters.docker_image - - -#### Attributes - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - basic_auth - - Map - - - - * - url - - String - - URL of the docker image. - - -### targets.resources.clusters.docker_image.basic_auth - - -#### Attributes - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - password - - String - - Password of the user - - * - username - - String - - Name of the user - - -### targets.resources.clusters.gcp_attributes -Attributes related to clusters running on Google Cloud Platform. -If not specified at cluster creation, a set of default values will be used. - -#### Attributes - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - availability - - String - - - - * - boot_disk_size - - Integer - - boot disk size in GB - - * - google_service_account - - String - - If provided, the cluster will impersonate the google service account when accessing gcloud services (like GCS). The google service account must have previously been added to the Databricks environment by an account administrator. - - * - local_ssd_count - - Integer - - If provided, each node (workers and driver) in the cluster will have this number of local SSDs attached. Each local SSD is 375GB in size. Refer to [GCP documentation](https://cloud.google.com/compute/docs/disks/local-ssd#choose_number_local_ssds) for the supported number of local SSDs for each instance type. - - * - use_preemptible_executors - - Boolean - - This field determines whether the spark executors will be scheduled to run on preemptible VMs (when set to true) versus standard compute engine VMs (when set to false; default). Note: Soon to be deprecated, use the availability field instead. - - * - zone_id - - String - - Identifier for the availability zone in which the cluster resides. This can be one of the following: - "HA" => High availability, spread nodes across availability zones for a Databricks deployment region [default] - "AUTO" => Databricks picks an availability zone to schedule the cluster on. - A GCP availability zone => Pick One of the available zones for (machine type + region) from https://cloud.google.com/compute/docs/regions-zones. - - -### targets.resources.clusters.workload_type - - -#### Attributes - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - clients - - Map - - defined what type of clients can use the cluster. E.g. Notebooks, Jobs - - -### targets.resources.clusters.workload_type.clients - defined what type of clients can use the cluster. E.g. Notebooks, Jobs - -#### Attributes - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - jobs - - Boolean - - With jobs set, the cluster can be used for jobs - - * - notebooks - - Boolean - - With notebooks set, this cluster can be used for notebooks - - -### targets.resources.dashboards -The dashboard definitions for the bundle. - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - - - Map - - Item of the `targets.resources.dashboards` map - -Each item has the following attributes: - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - create_time - - String - - The timestamp of when the dashboard was created. - - * - dashboard_id - - String - - UUID identifying the dashboard. - - * - display_name - - String - - The display name of the dashboard. - - * - embed_credentials - - Boolean - - - - * - etag - - String - - The etag for the dashboard. Can be optionally provided on updates to ensure that the dashboard has not been modified since the last read. This field is excluded in List Dashboards responses. - - * - file_path - - String - - - - * - lifecycle_state - - String - - The state of the dashboard resource. Used for tracking trashed status. - - * - parent_path - - String - - The workspace path of the folder containing the dashboard. Includes leading slash and no trailing slash. This field is excluded in List Dashboards responses. - - * - path - - String - - The workspace path of the dashboard asset, including the file name. Exported dashboards always have the file extension `.lvdash.json`. This field is excluded in List Dashboards responses. - - * - permissions - - Sequence - - - - * - serialized_dashboard - - Any - - The contents of the dashboard in serialized string form. This field is excluded in List Dashboards responses. Use the [get dashboard API](https://docs.databricks.com/api/workspace/lakeview/get) to retrieve an example response, which includes the `serialized_dashboard` field. This field provides the structure of the JSON string that represents the dashboard's layout and components. - - * - update_time - - String - - The timestamp of when the dashboard was last updated by the user. This field is excluded in List Dashboards responses. - - * - warehouse_id - - String - - The warehouse ID used to run the dashboard. - - -### targets.resources.experiments -The experiment definitions for the bundle. - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - - - Map - - Item of the `targets.resources.experiments` map - -Each item has the following attributes: - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - artifact_location - - String - - Location where artifacts for the experiment are stored. - - * - creation_time - - Integer - - Creation time - - * - experiment_id - - String - - Unique identifier for the experiment. - - * - last_update_time - - Integer - - Last update time - - * - lifecycle_stage - - String - - Current life cycle stage of the experiment: "active" or "deleted". Deleted experiments are not returned by APIs. - - * - name - - String - - Human readable name that identifies the experiment. - - * - permissions - - Sequence - - - - * - tags - - Sequence - - Tags: Additional metadata key-value pairs. - - -### targets.resources.jobs -The job definitions for the bundle. - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - - - Map - - Item of the `targets.resources.jobs` map - -Each item has the following attributes: - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - budget_policy_id - - String - - The id of the user specified budget policy to use for this job. If not specified, a default budget policy may be applied when creating or modifying the job. See `effective_budget_policy_id` for the budget policy used by this workload. - - * - continuous - - Map - - An optional continuous property for this job. The continuous property will ensure that there is always one run executing. Only one of `schedule` and `continuous` can be used. - - * - description - - String - - An optional description for the job. The maximum length is 27700 characters in UTF-8 encoding. - - * - email_notifications - - Map - - An optional set of email addresses that is notified when runs of this job begin or complete as well as when this job is deleted. - - * - environments - - Sequence - - A list of task execution environment specifications that can be referenced by serverless tasks of this job. An environment is required to be present for serverless tasks. For serverless notebook tasks, the environment is accessible in the notebook environment panel. For other serverless tasks, the task environment is required to be specified using environment_key in the task settings. - - * - git_source - - Map - - An optional specification for a remote Git repository containing the source code used by tasks. Version-controlled source code is supported by notebook, dbt, Python script, and SQL File tasks. If `git_source` is set, these tasks retrieve the file from the remote repository by default. However, this behavior can be overridden by setting `source` to `WORKSPACE` on the task. Note: dbt and SQL File tasks support only version-controlled sources. If dbt or SQL File tasks are used, `git_source` must be defined on the job. - - * - health - - Map - - - - * - job_clusters - - Sequence - - A list of job cluster specifications that can be shared and reused by tasks of this job. Libraries cannot be declared in a shared job cluster. You must declare dependent libraries in task settings. - - * - max_concurrent_runs - - Integer - - An optional maximum allowed number of concurrent runs of the job. Set this value if you want to be able to execute multiple runs of the same job concurrently. This is useful for example if you trigger your job on a frequent schedule and want to allow consecutive runs to overlap with each other, or if you want to trigger multiple runs which differ by their input parameters. This setting affects only new runs. For example, suppose the job’s concurrency is 4 and there are 4 concurrent active runs. Then setting the concurrency to 3 won’t kill any of the active runs. However, from then on, new runs are skipped unless there are fewer than 3 active runs. This value cannot exceed 1000. Setting this value to `0` causes all new runs to be skipped. - - * - name - - String - - An optional name for the job. The maximum length is 4096 bytes in UTF-8 encoding. - - * - notification_settings - - Map - - Optional notification settings that are used when sending notifications to each of the `email_notifications` and `webhook_notifications` for this job. - - * - parameters - - Sequence - - Job-level parameter definitions - - * - permissions - - Sequence - - - - * - queue - - Map - - The queue settings of the job. - - * - run_as - - Map - - - - * - schedule - - Map - - An optional periodic schedule for this job. The default behavior is that the job only runs when triggered by clicking “Run Now” in the Jobs UI or sending an API request to `runNow`. - - * - tags - - Map - - A map of tags associated with the job. These are forwarded to the cluster as cluster tags for jobs clusters, and are subject to the same limitations as cluster tags. A maximum of 25 tags can be added to the job. - - * - tasks - - Sequence - - A list of task specifications to be executed by this job. - - * - timeout_seconds - - Integer - - An optional timeout applied to each run of this job. A value of `0` means no timeout. - - * - trigger - - Map - - A configuration to trigger a run when certain conditions are met. The default behavior is that the job runs only when triggered by clicking “Run Now” in the Jobs UI or sending an API request to `runNow`. - - * - webhook_notifications - - Map - - A collection of system notification IDs to notify when runs of this job begin or complete. - - -### targets.resources.jobs.continuous -An optional continuous property for this job. The continuous property will ensure that there is always one run executing. Only one of `schedule` and `continuous` can be used. - -#### Attributes - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - pause_status - - String - - Indicate whether the continuous execution of the job is paused or not. Defaults to UNPAUSED. - - -### targets.resources.jobs.email_notifications -An optional set of email addresses that is notified when runs of this job begin or complete as well as when this job is deleted. - -#### Attributes - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - no_alert_for_skipped_runs - - Boolean - - If true, do not send email to recipients specified in `on_failure` if the run is skipped. This field is `deprecated`. Please use the `notification_settings.no_alert_for_skipped_runs` field. - - * - on_duration_warning_threshold_exceeded - - Sequence - - A list of email addresses to be notified when the duration of a run exceeds the threshold specified for the `RUN_DURATION_SECONDS` metric in the `health` field. If no rule for the `RUN_DURATION_SECONDS` metric is specified in the `health` field for the job, notifications are not sent. - - * - on_failure - - Sequence - - A list of email addresses to be notified when a run unsuccessfully completes. A run is considered to have completed unsuccessfully if it ends with an `INTERNAL_ERROR` `life_cycle_state` or a `FAILED`, or `TIMED_OUT` result_state. If this is not specified on job creation, reset, or update the list is empty, and notifications are not sent. - - * - on_start - - Sequence - - A list of email addresses to be notified when a run begins. If not specified on job creation, reset, or update, the list is empty, and notifications are not sent. - - * - on_streaming_backlog_exceeded - - Sequence - - A list of email addresses to notify when any streaming backlog thresholds are exceeded for any stream. Streaming backlog thresholds can be set in the `health` field using the following metrics: `STREAMING_BACKLOG_BYTES`, `STREAMING_BACKLOG_RECORDS`, `STREAMING_BACKLOG_SECONDS`, or `STREAMING_BACKLOG_FILES`. Alerting is based on the 10-minute average of these metrics. If the issue persists, notifications are resent every 30 minutes. - - * - on_success - - Sequence - - A list of email addresses to be notified when a run successfully completes. A run is considered to have completed successfully if it ends with a `TERMINATED` `life_cycle_state` and a `SUCCESS` result_state. If not specified on job creation, reset, or update, the list is empty, and notifications are not sent. - - -### targets.resources.jobs.git_source -An optional specification for a remote Git repository containing the source code used by tasks. Version-controlled source code is supported by notebook, dbt, Python script, and SQL File tasks. - -If `git_source` is set, these tasks retrieve the file from the remote repository by default. However, this behavior can be overridden by setting `source` to `WORKSPACE` on the task. - -Note: dbt and SQL File tasks support only version-controlled sources. If dbt or SQL File tasks are used, `git_source` must be defined on the job. - -#### Attributes - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - git_branch - - String - - Name of the branch to be checked out and used by this job. This field cannot be specified in conjunction with git_tag or git_commit. - - * - git_commit - - String - - Commit to be checked out and used by this job. This field cannot be specified in conjunction with git_branch or git_tag. - - * - git_provider - - String - - Unique identifier of the service used to host the Git repository. The value is case insensitive. - - * - git_tag - - String - - Name of the tag to be checked out and used by this job. This field cannot be specified in conjunction with git_branch or git_commit. - - * - git_url - - String - - URL of the repository to be cloned by this job. - - -### targets.resources.jobs.health - - -#### Attributes - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - rules - - Sequence - - - - -### targets.resources.jobs.notification_settings -Optional notification settings that are used when sending notifications to each of the `email_notifications` and `webhook_notifications` for this job. - -#### Attributes - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - no_alert_for_canceled_runs - - Boolean - - If true, do not send notifications to recipients specified in `on_failure` if the run is canceled. - - * - no_alert_for_skipped_runs - - Boolean - - If true, do not send notifications to recipients specified in `on_failure` if the run is skipped. - - -### targets.resources.jobs.queue -The queue settings of the job. - -#### Attributes - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - enabled - - Boolean - - If true, enable queueing for the job. This is a required field. - - -### targets.resources.jobs.run_as - - -#### Attributes - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - service_principal_name - - String - - Application ID of an active service principal. Setting this field requires the `servicePrincipal/user` role. - - * - user_name - - String - - The email of an active workspace user. Non-admin users can only set this field to their own email. - - -### targets.resources.jobs.schedule -An optional periodic schedule for this job. The default behavior is that the job only runs when triggered by clicking “Run Now” in the Jobs UI or sending an API request to `runNow`. - -#### Attributes - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - pause_status - - String - - Indicate whether this schedule is paused or not. - - * - quartz_cron_expression - - String - - A Cron expression using Quartz syntax that describes the schedule for a job. See [Cron Trigger](http://www.quartz-scheduler.org/documentation/quartz-2.3.0/tutorials/crontrigger.html) for details. This field is required. - - * - timezone_id - - String - - A Java timezone ID. The schedule for a job is resolved with respect to this timezone. See [Java TimeZone](https://docs.oracle.com/javase/7/docs/api/java/util/TimeZone.html) for details. This field is required. - - -### targets.resources.jobs.trigger -A configuration to trigger a run when certain conditions are met. The default behavior is that the job runs only when triggered by clicking “Run Now” in the Jobs UI or sending an API request to `runNow`. - -#### Attributes - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - file_arrival - - Map - - File arrival trigger settings. - - * - pause_status - - String - - Whether this trigger is paused or not. - - * - periodic - - Map - - Periodic trigger settings. - - * - table - - Map - - Old table trigger settings name. Deprecated in favor of `table_update`. - - * - table_update - - Map - - - - -### targets.resources.jobs.trigger.file_arrival -File arrival trigger settings. - -#### Attributes - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - min_time_between_triggers_seconds - - Integer - - If set, the trigger starts a run only after the specified amount of time passed since the last time the trigger fired. The minimum allowed value is 60 seconds - - * - url - - String - - URL to be monitored for file arrivals. The path must point to the root or a subpath of the external location. - - * - wait_after_last_change_seconds - - Integer - - If set, the trigger starts a run only after no file activity has occurred for the specified amount of time. This makes it possible to wait for a batch of incoming files to arrive before triggering a run. The minimum allowed value is 60 seconds. - - -### targets.resources.jobs.trigger.periodic -Periodic trigger settings. - -#### Attributes - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - interval - - Integer - - The interval at which the trigger should run. - - * - unit - - String - - The unit of time for the interval. - - -### targets.resources.jobs.trigger.table -Old table trigger settings name. Deprecated in favor of `table_update`. - -#### Attributes - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - condition - - String - - The table(s) condition based on which to trigger a job run. - - * - min_time_between_triggers_seconds - - Integer - - If set, the trigger starts a run only after the specified amount of time has passed since the last time the trigger fired. The minimum allowed value is 60 seconds. - - * - table_names - - Sequence - - A list of Delta tables to monitor for changes. The table name must be in the format `catalog_name.schema_name.table_name`. - - * - wait_after_last_change_seconds - - Integer - - If set, the trigger starts a run only after no table updates have occurred for the specified time and can be used to wait for a series of table updates before triggering a run. The minimum allowed value is 60 seconds. - - -### targets.resources.jobs.trigger.table_update - - -#### Attributes - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - condition - - String - - The table(s) condition based on which to trigger a job run. - - * - min_time_between_triggers_seconds - - Integer - - If set, the trigger starts a run only after the specified amount of time has passed since the last time the trigger fired. The minimum allowed value is 60 seconds. - - * - table_names - - Sequence - - A list of Delta tables to monitor for changes. The table name must be in the format `catalog_name.schema_name.table_name`. - - * - wait_after_last_change_seconds - - Integer - - If set, the trigger starts a run only after no table updates have occurred for the specified time and can be used to wait for a series of table updates before triggering a run. The minimum allowed value is 60 seconds. - - -### targets.resources.jobs.webhook_notifications -A collection of system notification IDs to notify when runs of this job begin or complete. - -#### Attributes - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - on_duration_warning_threshold_exceeded - - Sequence - - An optional list of system notification IDs to call when the duration of a run exceeds the threshold specified for the `RUN_DURATION_SECONDS` metric in the `health` field. A maximum of 3 destinations can be specified for the `on_duration_warning_threshold_exceeded` property. - - * - on_failure - - Sequence - - An optional list of system notification IDs to call when the run fails. A maximum of 3 destinations can be specified for the `on_failure` property. - - * - on_start - - Sequence - - An optional list of system notification IDs to call when the run starts. A maximum of 3 destinations can be specified for the `on_start` property. - - * - on_streaming_backlog_exceeded - - Sequence - - An optional list of system notification IDs to call when any streaming backlog thresholds are exceeded for any stream. Streaming backlog thresholds can be set in the `health` field using the following metrics: `STREAMING_BACKLOG_BYTES`, `STREAMING_BACKLOG_RECORDS`, `STREAMING_BACKLOG_SECONDS`, or `STREAMING_BACKLOG_FILES`. Alerting is based on the 10-minute average of these metrics. If the issue persists, notifications are resent every 30 minutes. A maximum of 3 destinations can be specified for the `on_streaming_backlog_exceeded` property. - - * - on_success - - Sequence - - An optional list of system notification IDs to call when the run completes successfully. A maximum of 3 destinations can be specified for the `on_success` property. - - -### targets.resources.model_serving_endpoints -The model serving endpoint definitions for the bundle. - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - - - Map - - Item of the `targets.resources.model_serving_endpoints` map - -Each item has the following attributes: - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - ai_gateway - - Map - - The AI Gateway configuration for the serving endpoint. NOTE: only external model endpoints are supported as of now. - - * - config - - Map - - The core config of the serving endpoint. - - * - name - - String - - The name of the serving endpoint. This field is required and must be unique across a Databricks workspace. An endpoint name can consist of alphanumeric characters, dashes, and underscores. - - * - permissions - - Sequence - - - - * - rate_limits - - Sequence - - Rate limits to be applied to the serving endpoint. NOTE: this field is deprecated, please use AI Gateway to manage rate limits. - - * - route_optimized - - Boolean - - Enable route optimization for the serving endpoint. - - * - tags - - Sequence - - Tags to be attached to the serving endpoint and automatically propagated to billing logs. - - -### targets.resources.model_serving_endpoints.ai_gateway -The AI Gateway configuration for the serving endpoint. NOTE: only external model endpoints are supported as of now. - -#### Attributes - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - guardrails - - Map - - Configuration for AI Guardrails to prevent unwanted data and unsafe data in requests and responses. - - * - inference_table_config - - Map - - Configuration for payload logging using inference tables. Use these tables to monitor and audit data being sent to and received from model APIs and to improve model quality. - - * - rate_limits - - Sequence - - Configuration for rate limits which can be set to limit endpoint traffic. - - * - usage_tracking_config - - Map - - Configuration to enable usage tracking using system tables. These tables allow you to monitor operational usage on endpoints and their associated costs. - - -### targets.resources.model_serving_endpoints.ai_gateway.guardrails -Configuration for AI Guardrails to prevent unwanted data and unsafe data in requests and responses. - -#### Attributes - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - input - - Map - - Configuration for input guardrail filters. - - * - output - - Map - - Configuration for output guardrail filters. - - -### targets.resources.model_serving_endpoints.ai_gateway.guardrails.input -Configuration for input guardrail filters. - -#### Attributes - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - invalid_keywords - - Sequence - - List of invalid keywords. AI guardrail uses keyword or string matching to decide if the keyword exists in the request or response content. - - * - pii - - Map - - Configuration for guardrail PII filter. - - * - safety - - Boolean - - Indicates whether the safety filter is enabled. - - * - valid_topics - - Sequence - - The list of allowed topics. Given a chat request, this guardrail flags the request if its topic is not in the allowed topics. - - -### targets.resources.model_serving_endpoints.ai_gateway.guardrails.input.pii -Configuration for guardrail PII filter. - -#### Attributes - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - behavior - - String - - Behavior for PII filter. Currently only 'BLOCK' is supported. If 'BLOCK' is set for the input guardrail and the request contains PII, the request is not sent to the model server and 400 status code is returned; if 'BLOCK' is set for the output guardrail and the model response contains PII, the PII info in the response is redacted and 400 status code is returned. - - -### targets.resources.model_serving_endpoints.ai_gateway.guardrails.output -Configuration for output guardrail filters. - -#### Attributes - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - invalid_keywords - - Sequence - - List of invalid keywords. AI guardrail uses keyword or string matching to decide if the keyword exists in the request or response content. - - * - pii - - Map - - Configuration for guardrail PII filter. - - * - safety - - Boolean - - Indicates whether the safety filter is enabled. - - * - valid_topics - - Sequence - - The list of allowed topics. Given a chat request, this guardrail flags the request if its topic is not in the allowed topics. - - -### targets.resources.model_serving_endpoints.ai_gateway.guardrails.output.pii -Configuration for guardrail PII filter. - -#### Attributes - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - behavior - - String - - Behavior for PII filter. Currently only 'BLOCK' is supported. If 'BLOCK' is set for the input guardrail and the request contains PII, the request is not sent to the model server and 400 status code is returned; if 'BLOCK' is set for the output guardrail and the model response contains PII, the PII info in the response is redacted and 400 status code is returned. - - -### targets.resources.model_serving_endpoints.ai_gateway.inference_table_config -Configuration for payload logging using inference tables. Use these tables to monitor and audit data being sent to and received from model APIs and to improve model quality. - -#### Attributes - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - catalog_name - - String - - The name of the catalog in Unity Catalog. Required when enabling inference tables. NOTE: On update, you have to disable inference table first in order to change the catalog name. - - * - enabled - - Boolean - - Indicates whether the inference table is enabled. - - * - schema_name - - String - - The name of the schema in Unity Catalog. Required when enabling inference tables. NOTE: On update, you have to disable inference table first in order to change the schema name. - - * - table_name_prefix - - String - - The prefix of the table in Unity Catalog. NOTE: On update, you have to disable inference table first in order to change the prefix name. - - -### targets.resources.model_serving_endpoints.ai_gateway.usage_tracking_config -Configuration to enable usage tracking using system tables. These tables allow you to monitor operational usage on endpoints and their associated costs. - -#### Attributes - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - enabled - - Boolean - - Whether to enable usage tracking. - - -### targets.resources.model_serving_endpoints.config -The core config of the serving endpoint. - -#### Attributes - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - auto_capture_config - - Map - - Configuration for Inference Tables which automatically logs requests and responses to Unity Catalog. - - * - served_entities - - Sequence - - A list of served entities for the endpoint to serve. A serving endpoint can have up to 15 served entities. - - * - served_models - - Sequence - - (Deprecated, use served_entities instead) A list of served models for the endpoint to serve. A serving endpoint can have up to 15 served models. - - * - traffic_config - - Map - - The traffic config defining how invocations to the serving endpoint should be routed. - - -### targets.resources.model_serving_endpoints.config.auto_capture_config -Configuration for Inference Tables which automatically logs requests and responses to Unity Catalog. - -#### Attributes - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - catalog_name - - String - - The name of the catalog in Unity Catalog. NOTE: On update, you cannot change the catalog name if the inference table is already enabled. - - * - enabled - - Boolean - - Indicates whether the inference table is enabled. - - * - schema_name - - String - - The name of the schema in Unity Catalog. NOTE: On update, you cannot change the schema name if the inference table is already enabled. - - * - table_name_prefix - - String - - The prefix of the table in Unity Catalog. NOTE: On update, you cannot change the prefix name if the inference table is already enabled. - - -### targets.resources.model_serving_endpoints.config.traffic_config -The traffic config defining how invocations to the serving endpoint should be routed. - -#### Attributes - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - routes - - Sequence - - The list of routes that define traffic to each served entity. - - -### targets.resources.models -The model definitions for the bundle. - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - - - Map - - Item of the `targets.resources.models` map - -Each item has the following attributes: - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - creation_timestamp - - Integer - - Timestamp recorded when this `registered_model` was created. - - * - description - - String - - Description of this `registered_model`. - - * - last_updated_timestamp - - Integer - - Timestamp recorded when metadata for this `registered_model` was last updated. - - * - latest_versions - - Sequence - - Collection of latest model versions for each stage. Only contains models with current `READY` status. - - * - name - - String - - Unique name for the model. - - * - permissions - - Sequence - - - - * - tags - - Sequence - - Tags: Additional metadata key-value pairs for this `registered_model`. - - * - user_id - - String - - User that created this `registered_model` - - -### targets.resources.pipelines -The pipeline definitions for the bundle. - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - - - Map - - Item of the `targets.resources.pipelines` map - -Each item has the following attributes: - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - budget_policy_id - - String - - Budget policy of this pipeline. - - * - catalog - - String - - A catalog in Unity Catalog to publish data from this pipeline to. If `target` is specified, tables in this pipeline are published to a `target` schema inside `catalog` (for example, `catalog`.`target`.`table`). If `target` is not specified, no data is published to Unity Catalog. - - * - channel - - String - - DLT Release Channel that specifies which version to use. - - * - clusters - - Sequence - - Cluster settings for this pipeline deployment. - - * - configuration - - Map - - String-String configuration for this pipeline execution. - - * - continuous - - Boolean - - Whether the pipeline is continuous or triggered. This replaces `trigger`. - - * - deployment - - Map - - Deployment type of this pipeline. - - * - development - - Boolean - - Whether the pipeline is in Development mode. Defaults to false. - - * - edition - - String - - Pipeline product edition. - - * - filters - - Map - - Filters on which Pipeline packages to include in the deployed graph. - - * - gateway_definition - - Map - - The definition of a gateway pipeline to support change data capture. - - * - id - - String - - Unique identifier for this pipeline. - - * - ingestion_definition - - Map - - The configuration for a managed ingestion pipeline. These settings cannot be used with the 'libraries', 'target' or 'catalog' settings. - - * - libraries - - Sequence - - Libraries or code needed by this deployment. - - * - name - - String - - Friendly identifier for this pipeline. - - * - notifications - - Sequence - - List of notification settings for this pipeline. - - * - permissions - - Sequence - - - - * - photon - - Boolean - - Whether Photon is enabled for this pipeline. - - * - restart_window - - Map - - Restart window of this pipeline. - - * - schema - - String - - The default schema (database) where tables are read from or published to. The presence of this field implies that the pipeline is in direct publishing mode. - - * - serverless - - Boolean - - Whether serverless compute is enabled for this pipeline. - - * - storage - - String - - DBFS root directory for storing checkpoints and tables. - - * - target - - String - - Target schema (database) to add tables in this pipeline to. If not specified, no data is published to the Hive metastore or Unity Catalog. To publish to Unity Catalog, also specify `catalog`. - - * - trigger - - Map - - Which pipeline trigger to use. Deprecated: Use `continuous` instead. - - -### targets.resources.pipelines.deployment -Deployment type of this pipeline. - -#### Attributes - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - kind - - String - - The deployment method that manages the pipeline. - - * - metadata_file_path - - String - - The path to the file containing metadata about the deployment. - - -### targets.resources.pipelines.filters -Filters on which Pipeline packages to include in the deployed graph. - -#### Attributes - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - exclude - - Sequence - - Paths to exclude. - - * - include - - Sequence - - Paths to include. - - -### targets.resources.pipelines.gateway_definition -The definition of a gateway pipeline to support change data capture. - -#### Attributes - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - connection_id - - String - - [Deprecated, use connection_name instead] Immutable. The Unity Catalog connection that this gateway pipeline uses to communicate with the source. - - * - connection_name - - String - - Immutable. The Unity Catalog connection that this gateway pipeline uses to communicate with the source. - - * - gateway_storage_catalog - - String - - Required, Immutable. The name of the catalog for the gateway pipeline's storage location. - - * - gateway_storage_name - - String - - Optional. The Unity Catalog-compatible name for the gateway storage location. This is the destination to use for the data that is extracted by the gateway. Delta Live Tables system will automatically create the storage location under the catalog and schema. - - * - gateway_storage_schema - - String - - Required, Immutable. The name of the schema for the gateway pipelines's storage location. - - -### targets.resources.pipelines.ingestion_definition -The configuration for a managed ingestion pipeline. These settings cannot be used with the 'libraries', 'target' or 'catalog' settings. - -#### Attributes - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - connection_name - - String - - Immutable. The Unity Catalog connection that this ingestion pipeline uses to communicate with the source. This is used with connectors for applications like Salesforce, Workday, and so on. - - * - ingestion_gateway_id - - String - - Immutable. Identifier for the gateway that is used by this ingestion pipeline to communicate with the source database. This is used with connectors to databases like SQL Server. - - * - objects - - Sequence - - Required. Settings specifying tables to replicate and the destination for the replicated tables. - - * - table_configuration - - Map - - Configuration settings to control the ingestion of tables. These settings are applied to all tables in the pipeline. - - -### targets.resources.pipelines.ingestion_definition.table_configuration -Configuration settings to control the ingestion of tables. These settings are applied to all tables in the pipeline. - -#### Attributes - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - primary_keys - - Sequence - - The primary key of the table used to apply changes. - - * - salesforce_include_formula_fields - - Boolean - - If true, formula fields defined in the table are included in the ingestion. This setting is only valid for the Salesforce connector - - * - scd_type - - String - - The SCD type to use to ingest the table. - - * - sequence_by - - Sequence - - The column names specifying the logical order of events in the source data. Delta Live Tables uses this sequencing to handle change events that arrive out of order. - - -### targets.resources.pipelines.restart_window -Restart window of this pipeline. - -#### Attributes - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - days_of_week - - String - - Days of week in which the restart is allowed to happen (within a five-hour window starting at start_hour). If not specified all days of the week will be used. - - * - start_hour - - Integer - - An integer between 0 and 23 denoting the start hour for the restart window in the 24-hour day. Continuous pipeline restart is triggered only within a five-hour window starting at this hour. - - * - time_zone_id - - String - - Time zone id of restart window. See https://docs.databricks.com/sql/language-manual/sql-ref-syntax-aux-conf-mgmt-set-timezone.html for details. If not specified, UTC will be used. - - -### targets.resources.pipelines.trigger -Which pipeline trigger to use. Deprecated: Use `continuous` instead. - -#### Attributes - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - cron - - Map - - - - * - manual - - Map - - - - -### targets.resources.pipelines.trigger.cron - - -#### Attributes - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - quartz_cron_schedule - - String - - - - * - timezone_id - - String - - - - -### targets.resources.quality_monitors -The quality monitor definitions for the bundle. - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - - - Map - - Item of the `targets.resources.quality_monitors` map - -Each item has the following attributes: - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - assets_dir - - String - - The directory to store monitoring assets (e.g. dashboard, metric tables). - - * - baseline_table_name - - String - - Name of the baseline table from which drift metrics are computed from. Columns in the monitored table should also be present in the baseline table. - - * - custom_metrics - - Sequence - - Custom metrics to compute on the monitored table. These can be aggregate metrics, derived metrics (from already computed aggregate metrics), or drift metrics (comparing metrics across time windows). - - * - data_classification_config - - Map - - The data classification config for the monitor. - - * - inference_log - - Map - - Configuration for monitoring inference logs. - - * - notifications - - Map - - The notification settings for the monitor. - - * - output_schema_name - - String - - Schema where output metric tables are created. - - * - schedule - - Map - - The schedule for automatically updating and refreshing metric tables. - - * - skip_builtin_dashboard - - Boolean - - Whether to skip creating a default dashboard summarizing data quality metrics. - - * - slicing_exprs - - Sequence - - List of column expressions to slice data with for targeted analysis. The data is grouped by each expression independently, resulting in a separate slice for each predicate and its complements. For high-cardinality columns, only the top 100 unique values by frequency will generate slices. - - * - snapshot - - Map - - Configuration for monitoring snapshot tables. - - * - table_name - - String - - - - * - time_series - - Map - - Configuration for monitoring time series tables. - - * - warehouse_id - - String - - Optional argument to specify the warehouse for dashboard creation. If not specified, the first running warehouse will be used. - - -### targets.resources.quality_monitors.data_classification_config -The data classification config for the monitor. - -#### Attributes - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - enabled - - Boolean - - Whether data classification is enabled. - - -### targets.resources.quality_monitors.inference_log -Configuration for monitoring inference logs. - -#### Attributes - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - granularities - - Sequence - - Granularities for aggregating data into time windows based on their timestamp. Currently the following static granularities are supported: {``"5 minutes"``, ``"30 minutes"``, ``"1 hour"``, ``"1 day"``, ``" week(s)"``, ``"1 month"``, ``"1 year"``}. - - * - label_col - - String - - Optional column that contains the ground truth for the prediction. - - * - model_id_col - - String - - Column that contains the id of the model generating the predictions. Metrics will be computed per model id by default, and also across all model ids. - - * - prediction_col - - String - - Column that contains the output/prediction from the model. - - * - prediction_proba_col - - String - - Optional column that contains the prediction probabilities for each class in a classification problem type. The values in this column should be a map, mapping each class label to the prediction probability for a given sample. The map should be of PySpark MapType(). - - * - problem_type - - String - - Problem type the model aims to solve. Determines the type of model-quality metrics that will be computed. - - * - timestamp_col - - String - - Column that contains the timestamps of requests. The column must be one of the following: - A ``TimestampType`` column - A column whose values can be converted to timestamps through the pyspark ``to_timestamp`` [function](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.functions.to_timestamp.html). - - -### targets.resources.quality_monitors.notifications -The notification settings for the monitor. - -#### Attributes - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - on_failure - - Map - - Who to send notifications to on monitor failure. - - * - on_new_classification_tag_detected - - Map - - Who to send notifications to when new data classification tags are detected. - - -### targets.resources.quality_monitors.notifications.on_failure -Who to send notifications to on monitor failure. - -#### Attributes - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - email_addresses - - Sequence - - The list of email addresses to send the notification to. A maximum of 5 email addresses is supported. - - -### targets.resources.quality_monitors.notifications.on_new_classification_tag_detected -Who to send notifications to when new data classification tags are detected. - -#### Attributes - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - email_addresses - - Sequence - - The list of email addresses to send the notification to. A maximum of 5 email addresses is supported. - - -### targets.resources.quality_monitors.schedule -The schedule for automatically updating and refreshing metric tables. - -#### Attributes - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - pause_status - - String - - Read only field that indicates whether a schedule is paused or not. - - * - quartz_cron_expression - - String - - The expression that determines when to run the monitor. See [examples](https://www.quartz-scheduler.org/documentation/quartz-2.3.0/tutorials/crontrigger.html). - - * - timezone_id - - String - - The timezone id (e.g., ``"PST"``) in which to evaluate the quartz expression. - - -### targets.resources.quality_monitors.time_series -Configuration for monitoring time series tables. - -#### Attributes - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - granularities - - Sequence - - Granularities for aggregating data into time windows based on their timestamp. Currently the following static granularities are supported: {``"5 minutes"``, ``"30 minutes"``, ``"1 hour"``, ``"1 day"``, ``" week(s)"``, ``"1 month"``, ``"1 year"``}. - - * - timestamp_col - - String - - Column that contains the timestamps of requests. The column must be one of the following: - A ``TimestampType`` column - A column whose values can be converted to timestamps through the pyspark ``to_timestamp`` [function](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.functions.to_timestamp.html). - - -### targets.resources.registered_models -The registered model definitions for the bundle. - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - + * - dashboards - Map - - Item of the `targets.resources.registered_models` map - -Each item has the following attributes: - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - catalog_name - - String - - The name of the catalog where the schema and the registered model reside - - * - comment - - String - - The comment attached to the registered model - - * - grants - - Sequence - - - - * - name - - String - - The name of the registered model - - * - schema_name - - String - - The name of the schema where the registered model resides - - * - storage_location - - String - - The storage location on the cloud under which model version data files are stored - - -### targets.resources.schemas -The schema definitions for the bundle. - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description + - The dashboard definitions for the bundle. See [_](/dev-tools/bundles/resources.md#dashboard) - * - + * - experiments - Map - - Item of the `targets.resources.schemas` map - -Each item has the following attributes: - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - catalog_name - - String - - Name of parent catalog. - - * - comment - - String - - User-provided free-form text description. - - * - grants - - Sequence - - - - * - name - - String - - Name of schema, relative to parent catalog. + - The experiment definitions for the bundle. See [_](/dev-tools/bundles/resources.md#experiment) - * - properties + * - jobs - Map - - - - * - storage_root - - String - - Storage root URL for managed tables within schema. - - -### targets.resources.volumes - - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description + - The job definitions for the bundle. See [_](/dev-tools/bundles/resources.md#job) - * - + * - model_serving_endpoints - Map - - Item of the `targets.resources.volumes` map - -Each item has the following attributes: - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - catalog_name - - String - - The name of the catalog where the schema and the volume are + - The model serving endpoint definitions for the bundle. See [_](/dev-tools/bundles/resources.md#model_serving_endpoint) - * - comment - - String - - The comment attached to the volume + * - models + - Map + - The model definitions for the bundle. See [_](/dev-tools/bundles/resources.md#model) - * - grants - - Sequence - - + * - pipelines + - Map + - The pipeline definitions for the bundle. See [_](/dev-tools/bundles/resources.md#pipeline) - * - name - - String - - The name of the volume + * - quality_monitors + - Map + - The quality monitor definitions for the bundle. See [_](/dev-tools/bundles/resources.md#quality_monitor) - * - schema_name - - String - - The name of the schema where the volume is + * - registered_models + - Map + - The registered model definitions for the bundle. See [_](/dev-tools/bundles/resources.md#registered_model) - * - storage_location - - String - - The storage location on the cloud + * - schemas + - Map + - The schema definitions for the bundle. See [_](/dev-tools/bundles/resources.md#schema) - * - volume_type - - String + * - volumes + - Map - -### targets.run_as -The identity to use to run the bundle. - -#### Attributes - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - service_principal_name - - String - - Application ID of an active service principal. Setting this field requires the `servicePrincipal/user` role. - - * - user_name - - String - - The email of an active workspace user. Non-admin users can only set this field to their own email. - - ### targets.sync The local paths to sync to the target workspace when a bundle is run or deployed. @@ -5233,110 +721,6 @@ The local paths to sync to the target workspace when a bundle is run or deployed - The local folder paths, which can be outside the bundle root, to synchronize to the workspace when the bundle is deployed. -### targets.variables -The custom variable definitions for the target. - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - - - Map - - Item of the `targets.variables` map - -Each item has the following attributes: - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - default - - Any - - - - * - description - - String - - The description of the variable. - - * - lookup - - Map - - The name of the alert, cluster_policy, cluster, dashboard, instance_pool, job, metastore, pipeline, query, service_principal, or warehouse object for which to retrieve an ID. - - * - type - - String - - The type of the variable. - - -### targets.variables.lookup -The name of the alert, cluster_policy, cluster, dashboard, instance_pool, job, metastore, pipeline, query, service_principal, or warehouse object for which to retrieve an ID. - -#### Attributes - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - alert - - String - - - - * - cluster - - String - - - - * - cluster_policy - - String - - - - * - dashboard - - String - - - - * - instance_pool - - String - - - - * - job - - String - - - - * - metastore - - String - - - - * - notification_destination - - String - - - - * - pipeline - - String - - - - * - query - - String - - - - * - service_principal - - String - - - - * - warehouse - - String - - - - ### targets.workspace The Databricks workspace for the target. diff --git a/bundle/internal/docs/main.go b/bundle/internal/docs/main.go index 5c033d84fc..328dd7fd15 100644 --- a/bundle/internal/docs/main.go +++ b/bundle/internal/docs/main.go @@ -91,21 +91,24 @@ type annotation struct { func generateDocs(workdir, outputPath string) error { annotationsPath := filepath.Join(workdir, "annotations.yml") - annotationsOpenApiPath := filepath.Join(workdir, "annotations_openapi.yml") - annotationsOpenApiOverridesPath := filepath.Join(workdir, "annotations_openapi_overrides.yml") - annotations, err := LoadAndMergeAnnotations([]string{annotationsPath, annotationsOpenApiPath, annotationsOpenApiOverridesPath}) + annotations, err := LoadAndMergeAnnotations([]string{annotationsPath}) if err != nil { log.Fatal(err) } schemas := map[string]jsonschema.Schema{} + customFields := map[string]bool{} s, err := jsonschema.FromType(reflect.TypeOf(config.Root{}), []func(reflect.Type, jsonschema.Schema) jsonschema.Schema{ removeJobsFields, makeVolumeTypeOptional, func(typ reflect.Type, s jsonschema.Schema) jsonschema.Schema { + _, isCustomField := annotations[jsonschema.TypePath(typ)] + if isCustomField { + customFields[jsonschema.TypePath(typ)] = true + } schemas[jsonschema.TypePath(typ)] = s refPath := getPath(typ) @@ -135,7 +138,7 @@ func generateDocs(workdir, outputPath string) error { log.Fatal(err) } - nodes := getNodes(s, schemas, annotations) + nodes := getNodes(s, schemas, customFields) err = buildMarkdown(nodes, outputPath) if err != nil { log.Fatal(err) From 10e0d276c8f3e03e254c6e296d58e952be8a67f3 Mon Sep 17 00:00:00 2001 From: Ilya Kuznetsov Date: Wed, 18 Dec 2024 21:12:44 +0100 Subject: [PATCH 05/26] fix: Remove extra headings --- bundle/internal/docs/docs.go | 3 ++- bundle/internal/docs/docs.md | 47 ++++++++++++++++++++---------------- 2 files changed, 28 insertions(+), 22 deletions(-) diff --git a/bundle/internal/docs/docs.go b/bundle/internal/docs/docs.go index 289874a377..84ee7bcf56 100644 --- a/bundle/internal/docs/docs.go +++ b/bundle/internal/docs/docs.go @@ -95,6 +95,7 @@ func buildMarkdown(nodes []rootNode, outputFile string) error { } else { m = m.H3(node.Title) } + m = m.LF() m = m.PlainText(node.Description) m = m.LF() @@ -110,7 +111,7 @@ func buildMarkdown(nodes []rootNode, outputFile string) error { m = m.LF() m = buildAttributeTable(m, node.ArrayItemAttributes) } else if len(node.Attributes) > 0 { - m = m.H4("Attributes") + // m = m.H4("Attributes") m = m.LF() m = buildAttributeTable(m, node.Attributes) } diff --git a/bundle/internal/docs/docs.md b/bundle/internal/docs/docs.md index 6cfbcc1319..e3f2124b9d 100644 --- a/bundle/internal/docs/docs.md +++ b/bundle/internal/docs/docs.md @@ -1,5 +1,6 @@ ## artifacts + Defines the attributes to build an artifact @@ -46,9 +47,9 @@ Each item has the following attributes: ## bundle + The attributes of the bundle. See [_](/dev-tools/bundles/settings.md#bundle) -#### Attributes .. list-table:: @@ -88,9 +89,9 @@ The attributes of the bundle. See [_](/dev-tools/bundles/settings.md#bundle) ### bundle.deployment + The definition of the bundle deployment -#### Attributes .. list-table:: @@ -110,9 +111,9 @@ The definition of the bundle deployment ### bundle.deployment.lock + The deployment lock attributes. -#### Attributes .. list-table:: @@ -132,9 +133,9 @@ The deployment lock attributes. ### bundle.git + The Git version control details that are associated with your bundle. -#### Attributes .. list-table:: @@ -154,9 +155,9 @@ The Git version control details that are associated with your bundle. ## experimental + Defines attributes for experimental features. -#### Attributes .. list-table:: @@ -184,9 +185,9 @@ Defines attributes for experimental features. ### experimental.pydabs + The PyDABs configuration. -#### Attributes .. list-table:: @@ -210,10 +211,12 @@ The PyDABs configuration. ## include + Specifies a list of path globs that contain configuration files to include within the bundle. See [_](/dev-tools/bundles/settings.md#include) ## permissions + Defines the permissions to apply to experiments, jobs, pipelines, and models defined in the bundle. See [_](/dev-tools/bundles/settings.md#permissions) and [_](/dev-tools/bundles/permissions.md). Each item of `permissions` has the following attributes: @@ -244,9 +247,9 @@ Each item of `permissions` has the following attributes: ## presets + Defines bundle deployment presets. See [_](/dev-tools/bundles/deployment-modes.md#presets). -#### Attributes .. list-table:: @@ -282,9 +285,9 @@ Defines bundle deployment presets. See [_](/dev-tools/bundles/deployment-modes.m ## resources + Specifies information about the Databricks resources used by the bundle. See [_](/dev-tools/bundles/resources.md). -#### Attributes .. list-table:: @@ -340,9 +343,9 @@ Specifies information about the Databricks resources used by the bundle. See [_] ## run_as + The identity to use to run the bundle. -#### Attributes .. list-table:: @@ -362,9 +365,9 @@ The identity to use to run the bundle. ## sync + The files and file paths to include or exclude in the bundle. See [_](/dev-tools/bundles/) -#### Attributes .. list-table:: @@ -388,6 +391,7 @@ The files and file paths to include or exclude in the bundle. See [_](/dev-tools ## targets + Defines deployment targets for the bundle. @@ -470,9 +474,9 @@ Each item has the following attributes: ### targets.bundle + The name of the bundle when deploying to this target. -#### Attributes .. list-table:: @@ -512,9 +516,9 @@ The name of the bundle when deploying to this target. ### targets.bundle.deployment + The definition of the bundle deployment -#### Attributes .. list-table:: @@ -534,9 +538,9 @@ The definition of the bundle deployment ### targets.bundle.deployment.lock + The deployment lock attributes. -#### Attributes .. list-table:: @@ -556,9 +560,9 @@ The deployment lock attributes. ### targets.bundle.git + The Git version control details that are associated with your bundle. -#### Attributes .. list-table:: @@ -578,9 +582,9 @@ The Git version control details that are associated with your bundle. ### targets.git + The Git version control settings for the target. -#### Attributes .. list-table:: @@ -600,9 +604,9 @@ The Git version control settings for the target. ### targets.presets + The deployment presets for the target. -#### Attributes .. list-table:: @@ -638,9 +642,9 @@ The deployment presets for the target. ### targets.resources + The resource definitions for the target. -#### Attributes .. list-table:: @@ -696,9 +700,9 @@ The resource definitions for the target. ### targets.sync + The local paths to sync to the target workspace when a bundle is run or deployed. -#### Attributes .. list-table:: @@ -722,9 +726,9 @@ The local paths to sync to the target workspace when a bundle is run or deployed ### targets.workspace + The Databricks workspace for the target. -#### Attributes .. list-table:: @@ -800,6 +804,7 @@ The Databricks workspace for the target. ## variables + A Map that defines the custom variables for the bundle, where each key is the name of the variable, and the value is a Map that defines the variable. @@ -842,9 +847,9 @@ Each item has the following attributes: ### variables.lookup + The name of the alert, cluster_policy, cluster, dashboard, instance_pool, job, metastore, pipeline, query, service_principal, or warehouse object for which to retrieve an ID. -#### Attributes .. list-table:: @@ -904,9 +909,9 @@ The name of the alert, cluster_policy, cluster, dashboard, instance_pool, job, m ## workspace + Defines the Databricks workspace for the bundle. -#### Attributes .. list-table:: From 64fa2bfcb77f2065938c65134d1035bf3b61a8ce Mon Sep 17 00:00:00 2001 From: Ilya Kuznetsov Date: Wed, 18 Dec 2024 21:49:42 +0100 Subject: [PATCH 06/26] fix: Small readme --- Makefile | 3 ++- bundle/internal/docs/README.md | 7 +++++++ go.mod | 4 ++-- go.sum | 2 -- 4 files changed, 11 insertions(+), 5 deletions(-) create mode 100644 bundle/internal/docs/README.md diff --git a/Makefile b/Makefile index c39e03a000..2db0ba0bd0 100644 --- a/Makefile +++ b/Makefile @@ -35,8 +35,9 @@ schema: @go run ./bundle/internal/schema ./bundle/internal/schema ./bundle/schema/jsonschema.json docs: - @echo "✓ Generating docs ..." + @echo "✓ Generating docs using ./bundle/internal/schema/annotations.yml file..." @go run ./bundle/internal/docs ./bundle/internal/schema ./bundle/internal/docs/docs.md + @echo "✓ Writing docs to ./bundle/internal/docs/docs.md" INTEGRATION = gotestsum --format github-actions --rerun-fails --jsonfile output.json --packages "./integration/..." -- -parallel 4 -timeout=2h diff --git a/bundle/internal/docs/README.md b/bundle/internal/docs/README.md new file mode 100644 index 0000000000..77d9abcad3 --- /dev/null +++ b/bundle/internal/docs/README.md @@ -0,0 +1,7 @@ +## docs-autogen + +1. Install [Golang](https://go.dev/doc/install) +2. Run `go mod download` from the repo root +3. Run `make docs` from the repo +4. See generated document in `./bundle/internal/docs/docs.md` +5. To change descriptions update content in `./bundle/internal/schema/annotations.yml` and re-run `make docs` diff --git a/go.mod b/go.mod index d999344229..33a6bed751 100644 --- a/go.mod +++ b/go.mod @@ -33,6 +33,8 @@ require ( gopkg.in/yaml.v3 v3.0.1 ) +require github.com/nao1215/markdown v0.6.0 + require ( cloud.google.com/go/auth v0.4.2 // indirect cloud.google.com/go/auth/oauth2adapt v0.2.2 // indirect @@ -56,10 +58,8 @@ require ( github.com/karrick/godirwalk v1.17.0 // indirect github.com/mattn/go-colorable v0.1.13 // indirect github.com/mattn/go-runewidth v0.0.9 // indirect - github.com/nao1215/markdown v0.6.0 // indirect github.com/olekukonko/tablewriter v0.0.5 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect - github.com/radovskyb/watcher v1.0.7 // indirect github.com/stretchr/objx v0.5.2 // indirect github.com/zclconf/go-cty v1.15.0 // indirect go.opencensus.io v0.24.0 // indirect diff --git a/go.sum b/go.sum index e0b58c4f14..a923b6e050 100644 --- a/go.sum +++ b/go.sum @@ -144,8 +144,6 @@ github.com/pkg/browser v0.0.0-20210911075715-681adbf594b8/go.mod h1:HKlIX3XHQyzL github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= -github.com/radovskyb/watcher v1.0.7 h1:AYePLih6dpmS32vlHfhCeli8127LzkIgwJGcwwe8tUE= -github.com/radovskyb/watcher v1.0.7/go.mod h1:78okwvY5wPdzcb1UYnip1pvrZNIVEIh/Cm+ZuvsUYIg= github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= github.com/sabhiram/go-gitignore v0.0.0-20210923224102-525f6e181f06 h1:OkMGxebDjyw0ULyrTYWeN0UNCCkmCWfjPnIA2W6oviI= github.com/sabhiram/go-gitignore v0.0.0-20210923224102-525f6e181f06/go.mod h1:+ePHsJ1keEjQtpvf9HHw0f4ZeJ0TLRsxhunSI2hYJSs= From 820dd5f34cd1550be10496b2825b8da5630f27b5 Mon Sep 17 00:00:00 2001 From: Ilya Kuznetsov Date: Thu, 2 Jan 2025 16:13:24 +0100 Subject: [PATCH 07/26] feat: Add examples to the docs --- bundle/internal/docs/docs.go | 32 +++++++++++++++++++++++++- bundle/internal/docs/docs.md | 30 ++++++++++++++++++++++++ bundle/internal/docs/main.go | 4 ++++ bundle/internal/schema/annotations.go | 1 + bundle/internal/schema/annotations.yml | 22 ++++++++++++++++-- libs/jsonschema/schema.go | 4 ++++ 6 files changed, 90 insertions(+), 3 deletions(-) diff --git a/bundle/internal/docs/docs.go b/bundle/internal/docs/docs.go index 84ee7bcf56..4948cc9929 100644 --- a/bundle/internal/docs/docs.go +++ b/bundle/internal/docs/docs.go @@ -50,6 +50,7 @@ func getNodes(s jsonschema.Schema, refs map[string]jsonschema.Schema, customFiel Title: k, Description: getDescription(v, item.topLevel), TopLevel: item.topLevel, + Example: getExample(v), } node.Attributes = getAttributes(v.Properties, refs) @@ -80,6 +81,15 @@ func getNodes(s jsonschema.Schema, refs map[string]jsonschema.Schema, customFiel return nodes } +const header = `--- +description: Configuration reference for databricks.yml +--- + +# Configuration reference + +This article provides reference for keys supported by configuration (YAML). See [\_](/dev-tools/bundles/index.md). +` + func buildMarkdown(nodes []rootNode, outputFile string) error { f, err := os.Create(outputFile) if err != nil { @@ -88,6 +98,7 @@ func buildMarkdown(nodes []rootNode, outputFile string) error { defer f.Close() m := md.NewMarkdown(f) + m = m.PlainText(header) for _, node := range nodes { m = m.LF() if node.TopLevel { @@ -111,10 +122,16 @@ func buildMarkdown(nodes []rootNode, outputFile string) error { m = m.LF() m = buildAttributeTable(m, node.ArrayItemAttributes) } else if len(node.Attributes) > 0 { - // m = m.H4("Attributes") m = m.LF() m = buildAttributeTable(m, node.Attributes) } + + if node.Example != "" { + m = m.LF() + m = m.H3("Example") + m = m.LF() + m = m.PlainText(node.Example) + } } err = m.Build() @@ -204,6 +221,7 @@ func resolveRefs(s *jsonschema.Schema, schemas map[string]jsonschema.Schema) *js description := s.Description markdownDescription := s.MarkdownDescription + examples := s.Examples for node.Reference != nil { ref := strings.TrimPrefix(*node.Reference, "#/$defs/") @@ -218,12 +236,16 @@ func resolveRefs(s *jsonschema.Schema, schemas map[string]jsonschema.Schema) *js if markdownDescription == "" { markdownDescription = newNode.MarkdownDescription } + if len(examples) == 0 { + examples = newNode.Examples + } node = &newNode } node.Description = description node.MarkdownDescription = markdownDescription + node.Examples = examples return node } @@ -247,3 +269,11 @@ func extractNodes(prefix string, props map[string]*jsonschema.Schema, refs map[s } return nodes } + +func getExample(v *jsonschema.Schema) string { + examples := v.Examples + if len(examples) == 0 { + return "" + } + return examples[0].(string) +} diff --git a/bundle/internal/docs/docs.md b/bundle/internal/docs/docs.md index e3f2124b9d..94f556157f 100644 --- a/bundle/internal/docs/docs.md +++ b/bundle/internal/docs/docs.md @@ -1,3 +1,11 @@ +--- +description: Configuration reference for databricks.yml +--- + +# Configuration reference + +This article provides reference for keys supported by configuration (YAML). See [\_](/dev-tools/bundles/index.md). + ## artifacts @@ -46,6 +54,16 @@ Each item has the following attributes: - The type of the artifact. Valid values are `wheel` or `jar` +### Example + +```yaml +artifacts: + default: + type: whl + build: poetry build + path: . +``` + ## bundle The attributes of the bundle. See [_](/dev-tools/bundles/settings.md#bundle) @@ -246,6 +264,18 @@ Each item of `permissions` has the following attributes: - The name of the user that has the permission set in level. +### Example + +```yaml +permissions: + - level: CAN_VIEW + group_name: test-group + - level: CAN_MANAGE + user_name: someone@example.com + - level: CAN_RUN + service_principal_name: 123456-abcdef +``` + ## presets Defines bundle deployment presets. See [_](/dev-tools/bundles/deployment-modes.md#presets). diff --git a/bundle/internal/docs/main.go b/bundle/internal/docs/main.go index 328dd7fd15..cc957c8a9e 100644 --- a/bundle/internal/docs/main.go +++ b/bundle/internal/docs/main.go @@ -87,6 +87,7 @@ type annotation struct { Title string `json:"title,omitempty"` Default any `json:"default,omitempty"` Enum []any `json:"enum,omitempty"` + MarkdownExamples string `json:"markdown_examples,omitempty"` } func generateDocs(workdir, outputPath string) error { @@ -157,6 +158,9 @@ func assignAnnotation(s *jsonschema.Schema, a annotation) { if a.MarkdownDescription != "" { s.MarkdownDescription = a.MarkdownDescription } + if a.MarkdownExamples != "" { + s.Examples = []any{a.MarkdownExamples} + } } func LoadAndMergeAnnotations(sources []string) (annotationFile, error) { diff --git a/bundle/internal/schema/annotations.go b/bundle/internal/schema/annotations.go index aec5e68b0e..26eb79dbff 100644 --- a/bundle/internal/schema/annotations.go +++ b/bundle/internal/schema/annotations.go @@ -24,6 +24,7 @@ type annotation struct { Title string `json:"title,omitempty"` Default any `json:"default,omitempty"` Enum []any `json:"enum,omitempty"` + MarkdownExamples string `json:"markdown_examples,omitempty"` } type annotationHandler struct { diff --git a/bundle/internal/schema/annotations.yml b/bundle/internal/schema/annotations.yml index e52189daa1..0339834c74 100644 --- a/bundle/internal/schema/annotations.yml +++ b/bundle/internal/schema/annotations.yml @@ -183,6 +183,14 @@ github.com/databricks/cli/bundle/config.Root: "artifacts": "description": |- Defines the attributes to build an artifact + "markdown_examples": |- + ```yaml + artifacts: + default: + type: whl + build: poetry build + path: . + ``` "bundle": "description": |- The attributes of the bundle. @@ -201,6 +209,16 @@ github.com/databricks/cli/bundle/config.Root: Defines the permissions to apply to experiments, jobs, pipelines, and models defined in the bundle "markdown_description": |- Defines the permissions to apply to experiments, jobs, pipelines, and models defined in the bundle. See [_](/dev-tools/bundles/settings.md#permissions) and [_](/dev-tools/bundles/permissions.md). + "markdown_examples": |- + ```yaml + permissions: + - level: CAN_VIEW + group_name: test-group + - level: CAN_MANAGE + user_name: someone@example.com + - level: CAN_RUN + service_principal_name: 123456-abcdef + ``` "presets": "description": |- Defines bundle deployment presets. @@ -445,8 +463,8 @@ github.com/databricks/databricks-sdk-go/service/serving.Ai21LabsConfig: "ai21labs_api_key_plaintext": "description": |- PLACEHOLDER -github.com/databricks/databricks-sdk-go/service/serving.GoogleCloudVertexAiConfig: - "private_key": +? github.com/databricks/databricks-sdk-go/service/serving.GoogleCloudVertexAiConfig +: "private_key": "description": |- PLACEHOLDER "private_key_plaintext": diff --git a/libs/jsonschema/schema.go b/libs/jsonschema/schema.go index e63dde359e..5028bb0d7b 100644 --- a/libs/jsonschema/schema.go +++ b/libs/jsonschema/schema.go @@ -76,6 +76,10 @@ type Schema struct { // Title of the object, rendered as inline documentation in the IDE. // https://json-schema.org/understanding-json-schema/reference/annotations Title string `json:"title,omitempty"` + + // Examples of the value for properties in the schema. + // https://json-schema.org/understanding-json-schema/reference/annotations + Examples []any `json:"examples,omitempty"` } // Default value defined in a JSON Schema, represented as a string. From 460a4558e07aa481b07acc6f1aa2a9c329592ebe Mon Sep 17 00:00:00 2001 From: Ilya Kuznetsov Date: Thu, 2 Jan 2025 16:40:06 +0100 Subject: [PATCH 08/26] chore: Extract annotation package --- bundle/internal/annotation/main.go | 56 ++++++++++++++ bundle/internal/docs/main.go | 101 ++----------------------- bundle/internal/schema/annotations.go | 57 +++----------- bundle/internal/schema/annotations.yml | 4 +- bundle/internal/schema/main_test.go | 5 +- bundle/internal/schema/parser.go | 23 +++--- 6 files changed, 88 insertions(+), 158 deletions(-) create mode 100644 bundle/internal/annotation/main.go diff --git a/bundle/internal/annotation/main.go b/bundle/internal/annotation/main.go new file mode 100644 index 0000000000..0053e16c95 --- /dev/null +++ b/bundle/internal/annotation/main.go @@ -0,0 +1,56 @@ +package annotation + +import ( + "bytes" + "os" + + "github.com/databricks/cli/libs/dyn" + "github.com/databricks/cli/libs/dyn/convert" + "github.com/databricks/cli/libs/dyn/merge" + "github.com/databricks/cli/libs/dyn/yamlloader" +) + +type Descriptor struct { + Description string `json:"description,omitempty"` + MarkdownDescription string `json:"markdown_description,omitempty"` + Title string `json:"title,omitempty"` + Default any `json:"default,omitempty"` + Enum []any `json:"enum,omitempty"` + MarkdownExamples string `json:"markdown_examples,omitempty"` +} + +/** + * Parsed file with annotations, expected format: + * github.com/databricks/cli/bundle/config.Bundle: + * cluster_id: + * description: "Description" + */ +type File map[string]map[string]Descriptor + +func LoadAndMerge(sources []string) (File, error) { + prev := dyn.NilValue + for _, path := range sources { + b, err := os.ReadFile(path) + if err != nil { + return nil, err + } + generated, err := yamlloader.LoadYAML(path, bytes.NewBuffer(b)) + if err != nil { + return nil, err + } + prev, err = merge.Merge(prev, generated) + if err != nil { + return nil, err + } + } + + var data File + + err := convert.ToTyped(&data, prev) + if err != nil { + return nil, err + } + return data, nil +} + +const Placeholder = "PLACEHOLDER" diff --git a/bundle/internal/docs/main.go b/bundle/internal/docs/main.go index cc957c8a9e..f0e8ef2e09 100644 --- a/bundle/internal/docs/main.go +++ b/bundle/internal/docs/main.go @@ -1,7 +1,6 @@ package main import ( - "bytes" "fmt" "log" "os" @@ -10,60 +9,10 @@ import ( "strings" "github.com/databricks/cli/bundle/config" - "github.com/databricks/cli/bundle/config/resources" - "github.com/databricks/cli/libs/dyn" - "github.com/databricks/cli/libs/dyn/convert" - "github.com/databricks/cli/libs/dyn/merge" - "github.com/databricks/cli/libs/dyn/yamlloader" + "github.com/databricks/cli/bundle/internal/annotation" "github.com/databricks/cli/libs/jsonschema" - "github.com/databricks/databricks-sdk-go/service/jobs" ) -const Placeholder = "PLACEHOLDER" - -func removeJobsFields(typ reflect.Type, s jsonschema.Schema) jsonschema.Schema { - switch typ { - case reflect.TypeOf(resources.Job{}): - // This field has been deprecated in jobs API v2.1 and is always set to - // "MULTI_TASK" in the backend. We should not expose it to the user. - delete(s.Properties, "format") - - // These fields are only meant to be set by the DABs client (ie the CLI) - // and thus should not be exposed to the user. These are used to annotate - // jobs that were created by DABs. - delete(s.Properties, "deployment") - delete(s.Properties, "edit_mode") - - case reflect.TypeOf(jobs.GitSource{}): - // These fields are readonly and are not meant to be set by the user. - delete(s.Properties, "job_source") - delete(s.Properties, "git_snapshot") - - default: - // Do nothing - } - - return s -} - -// While volume_type is required in the volume create API, DABs automatically sets -// it's value to "MANAGED" if it's not provided. Thus, we make it optional -// in the bundle schema. -func makeVolumeTypeOptional(typ reflect.Type, s jsonschema.Schema) jsonschema.Schema { - if typ != reflect.TypeOf(resources.Volume{}) { - return s - } - - res := []string{} - for _, r := range s.Required { - if r != "volume_type" { - res = append(res, r) - } - } - s.Required = res - return s -} - func main() { if len(os.Args) != 3 { fmt.Println("Usage: go run main.go ") @@ -79,21 +28,10 @@ func main() { } } -type annotationFile map[string]map[string]annotation - -type annotation struct { - Description string `json:"description,omitempty"` - MarkdownDescription string `json:"markdown_description,omitempty"` - Title string `json:"title,omitempty"` - Default any `json:"default,omitempty"` - Enum []any `json:"enum,omitempty"` - MarkdownExamples string `json:"markdown_examples,omitempty"` -} - func generateDocs(workdir, outputPath string) error { annotationsPath := filepath.Join(workdir, "annotations.yml") - annotations, err := LoadAndMergeAnnotations([]string{annotationsPath}) + annotations, err := annotation.LoadAndMerge([]string{annotationsPath}) if err != nil { log.Fatal(err) } @@ -102,9 +40,6 @@ func generateDocs(workdir, outputPath string) error { customFields := map[string]bool{} s, err := jsonschema.FromType(reflect.TypeOf(config.Root{}), []func(reflect.Type, jsonschema.Schema) jsonschema.Schema{ - removeJobsFields, - makeVolumeTypeOptional, - func(typ reflect.Type, s jsonschema.Schema) jsonschema.Schema { _, isCustomField := annotations[jsonschema.TypePath(typ)] if isCustomField { @@ -120,7 +55,7 @@ func generateDocs(workdir, outputPath string) error { a := annotations[refPath] if a == nil { - a = map[string]annotation{} + a = map[string]annotation.Descriptor{} } rootTypeAnnotation, ok := a["_"] @@ -151,8 +86,8 @@ func getPath(typ reflect.Type) string { return typ.PkgPath() + "." + typ.Name() } -func assignAnnotation(s *jsonschema.Schema, a annotation) { - if a.Description != "" && a.Description != Placeholder { +func assignAnnotation(s *jsonschema.Schema, a annotation.Descriptor) { + if a.Description != "" && a.Description != annotation.Placeholder { s.Description = a.Description } if a.MarkdownDescription != "" { @@ -162,29 +97,3 @@ func assignAnnotation(s *jsonschema.Schema, a annotation) { s.Examples = []any{a.MarkdownExamples} } } - -func LoadAndMergeAnnotations(sources []string) (annotationFile, error) { - prev := dyn.NilValue - for _, path := range sources { - b, err := os.ReadFile(path) - if err != nil { - return nil, err - } - generated, err := yamlloader.LoadYAML(path, bytes.NewBuffer(b)) - if err != nil { - return nil, err - } - prev, err = merge.Merge(prev, generated) - if err != nil { - return nil, err - } - } - - var data annotationFile - - err := convert.ToTyped(&data, prev) - if err != nil { - return nil, err - } - return data, nil -} diff --git a/bundle/internal/schema/annotations.go b/bundle/internal/schema/annotations.go index 26eb79dbff..d28e544822 100644 --- a/bundle/internal/schema/annotations.go +++ b/bundle/internal/schema/annotations.go @@ -10,6 +10,7 @@ import ( yaml3 "gopkg.in/yaml.v3" + "github.com/databricks/cli/bundle/internal/annotation" "github.com/databricks/cli/libs/dyn" "github.com/databricks/cli/libs/dyn/convert" "github.com/databricks/cli/libs/dyn/merge" @@ -18,61 +19,23 @@ import ( "github.com/databricks/cli/libs/jsonschema" ) -type annotation struct { - Description string `json:"description,omitempty"` - MarkdownDescription string `json:"markdown_description,omitempty"` - Title string `json:"title,omitempty"` - Default any `json:"default,omitempty"` - Enum []any `json:"enum,omitempty"` - MarkdownExamples string `json:"markdown_examples,omitempty"` -} - type annotationHandler struct { // Annotations read from all annotation files including all overrides - parsedAnnotations annotationFile + parsedAnnotations annotation.File // Missing annotations for fields that are found in config that need to be added to the annotation file - missingAnnotations annotationFile + missingAnnotations annotation.File } -/** - * Parsed file with annotations, expected format: - * github.com/databricks/cli/bundle/config.Bundle: - * cluster_id: - * description: "Description" - */ -type annotationFile map[string]map[string]annotation - -const Placeholder = "PLACEHOLDER" - // Adds annotations to the JSON schema reading from the annotation files. // More details https://json-schema.org/understanding-json-schema/reference/annotations func newAnnotationHandler(sources []string) (*annotationHandler, error) { - prev := dyn.NilValue - for _, path := range sources { - b, err := os.ReadFile(path) - if err != nil { - return nil, err - } - generated, err := yamlloader.LoadYAML(path, bytes.NewBuffer(b)) - if err != nil { - return nil, err - } - prev, err = merge.Merge(prev, generated) - if err != nil { - return nil, err - } - } - - var data annotationFile - - err := convert.ToTyped(&data, prev) + data, err := annotation.LoadAndMerge(sources) if err != nil { return nil, err } - d := &annotationHandler{} d.parsedAnnotations = data - d.missingAnnotations = annotationFile{} + d.missingAnnotations = annotation.File{} return d, nil } @@ -85,7 +48,7 @@ func (d *annotationHandler) addAnnotations(typ reflect.Type, s jsonschema.Schema annotations := d.parsedAnnotations[refPath] if annotations == nil { - annotations = map[string]annotation{} + annotations = map[string]annotation.Descriptor{} } rootTypeAnnotation, ok := annotations[RootTypeKey] @@ -96,11 +59,11 @@ func (d *annotationHandler) addAnnotations(typ reflect.Type, s jsonschema.Schema for k, v := range s.Properties { item := annotations[k] if item.Description == "" { - item.Description = Placeholder + item.Description = annotation.Placeholder emptyAnnotations := d.missingAnnotations[refPath] if emptyAnnotations == nil { - emptyAnnotations = map[string]annotation{} + emptyAnnotations = map[string]annotation.Descriptor{} d.missingAnnotations[refPath] = emptyAnnotations } emptyAnnotations[k] = item @@ -141,8 +104,8 @@ func getPath(typ reflect.Type) string { return typ.PkgPath() + "." + typ.Name() } -func assignAnnotation(s *jsonschema.Schema, a annotation) { - if a.Description != Placeholder { +func assignAnnotation(s *jsonschema.Schema, a annotation.Descriptor) { + if a.Description != annotation.Placeholder { s.Description = a.Description } diff --git a/bundle/internal/schema/annotations.yml b/bundle/internal/schema/annotations.yml index 0339834c74..b66f989437 100644 --- a/bundle/internal/schema/annotations.yml +++ b/bundle/internal/schema/annotations.yml @@ -463,8 +463,8 @@ github.com/databricks/databricks-sdk-go/service/serving.Ai21LabsConfig: "ai21labs_api_key_plaintext": "description": |- PLACEHOLDER -? github.com/databricks/databricks-sdk-go/service/serving.GoogleCloudVertexAiConfig -: "private_key": +github.com/databricks/databricks-sdk-go/service/serving.GoogleCloudVertexAiConfig: + "private_key": "description": |- PLACEHOLDER "private_key_plaintext": diff --git a/bundle/internal/schema/main_test.go b/bundle/internal/schema/main_test.go index 607347b6b1..902fd32c94 100644 --- a/bundle/internal/schema/main_test.go +++ b/bundle/internal/schema/main_test.go @@ -11,6 +11,7 @@ import ( "testing" "github.com/databricks/cli/bundle/config" + "github.com/databricks/cli/bundle/internal/annotation" "github.com/databricks/cli/libs/dyn" "github.com/databricks/cli/libs/dyn/merge" "github.com/databricks/cli/libs/dyn/yamlloader" @@ -113,13 +114,13 @@ func TestNoDetachedAnnotations(t *testing.T) { assert.Empty(t, types, "Detached annotations found, regenerate schema and check for package path changes") } -func getAnnotations(path string) (annotationFile, error) { +func getAnnotations(path string) (annotation.File, error) { b, err := os.ReadFile(path) if err != nil { return nil, err } - var data annotationFile + var data annotation.File err = yaml.Unmarshal(b, &data) return data, err } diff --git a/bundle/internal/schema/parser.go b/bundle/internal/schema/parser.go index 3fbec05286..dd55a9e81d 100644 --- a/bundle/internal/schema/parser.go +++ b/bundle/internal/schema/parser.go @@ -9,6 +9,7 @@ import ( "reflect" "strings" + "github.com/databricks/cli/bundle/internal/annotation" "github.com/databricks/cli/libs/dyn/yamlloader" "github.com/databricks/cli/libs/jsonschema" "gopkg.in/yaml.v3" @@ -95,8 +96,8 @@ func (p *openapiParser) findRef(typ reflect.Type) (jsonschema.Schema, bool) { // Use the OpenAPI spec to load descriptions for the given type. func (p *openapiParser) extractAnnotations(typ reflect.Type, outputPath, overridesPath string) error { - annotations := annotationFile{} - overrides := annotationFile{} + annotations := annotation.File{} + overrides := annotation.File{} b, err := os.ReadFile(overridesPath) if err != nil { @@ -107,7 +108,7 @@ func (p *openapiParser) extractAnnotations(typ reflect.Type, outputPath, overrid return err } if overrides == nil { - overrides = annotationFile{} + overrides = annotation.File{} } _, err = jsonschema.FromType(typ, []func(reflect.Type, jsonschema.Schema) jsonschema.Schema{ @@ -118,16 +119,16 @@ func (p *openapiParser) extractAnnotations(typ reflect.Type, outputPath, overrid } basePath := getPath(typ) - pkg := map[string]annotation{} + pkg := map[string]annotation.Descriptor{} annotations[basePath] = pkg if ref.Description != "" || ref.Enum != nil { - pkg[RootTypeKey] = annotation{Description: ref.Description, Enum: ref.Enum} + pkg[RootTypeKey] = annotation.Descriptor{Description: ref.Description, Enum: ref.Enum} } for k := range s.Properties { if refProp, ok := ref.Properties[k]; ok { - pkg[k] = annotation{Description: refProp.Description, Enum: refProp.Enum} + pkg[k] = annotation.Descriptor{Description: refProp.Description, Enum: refProp.Enum} if refProp.Description == "" { addEmptyOverride(k, basePath, overrides) } @@ -167,22 +168,22 @@ func (p *openapiParser) extractAnnotations(typ reflect.Type, outputPath, overrid return nil } -func addEmptyOverride(key, pkg string, overridesFile annotationFile) { +func addEmptyOverride(key, pkg string, overridesFile annotation.File) { if overridesFile[pkg] == nil { - overridesFile[pkg] = map[string]annotation{} + overridesFile[pkg] = map[string]annotation.Descriptor{} } overrides := overridesFile[pkg] if overrides[key].Description == "" { - overrides[key] = annotation{Description: Placeholder} + overrides[key] = annotation.Descriptor{Description: annotation.Placeholder} } a, ok := overrides[key] if !ok { - a = annotation{} + a = annotation.Descriptor{} } if a.Description == "" { - a.Description = Placeholder + a.Description = annotation.Placeholder } overrides[key] = a } From 541c3e3fb5773f123082af50e0ff68be881bba50 Mon Sep 17 00:00:00 2001 From: Ilya Kuznetsov Date: Thu, 2 Jan 2025 16:56:36 +0100 Subject: [PATCH 09/26] feat: More explicit type for arrays --- bundle/internal/docs/docs.go | 10 +++++++++- bundle/internal/docs/docs.md | 2 ++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/bundle/internal/docs/docs.go b/bundle/internal/docs/docs.go index 4948cc9929..57d42ce14d 100644 --- a/bundle/internal/docs/docs.go +++ b/bundle/internal/docs/docs.go @@ -100,6 +100,8 @@ func buildMarkdown(nodes []rootNode, outputFile string) error { m := md.NewMarkdown(f) m = m.PlainText(header) for _, node := range nodes { + isArray := len(node.ArrayItemAttributes) > 0 + m = m.LF() if node.TopLevel { m = m.H2(node.Title) @@ -107,7 +109,13 @@ func buildMarkdown(nodes []rootNode, outputFile string) error { m = m.H3(node.Title) } m = m.LF() - m = m.PlainText(node.Description) + if isArray { + m = m.PlainText("**`Type: Array`**") + m = m.LF() + m = m.PlainText(node.Description) + } else { + m = m.PlainText(node.Description) + } m = m.LF() if len(node.ObjectKeyAttributes) > 0 { diff --git a/bundle/internal/docs/docs.md b/bundle/internal/docs/docs.md index 94f556157f..04b1e83d3a 100644 --- a/bundle/internal/docs/docs.md +++ b/bundle/internal/docs/docs.md @@ -235,6 +235,8 @@ Specifies a list of path globs that contain configuration files to include withi ## permissions +**`Type: Array`** + Defines the permissions to apply to experiments, jobs, pipelines, and models defined in the bundle. See [_](/dev-tools/bundles/settings.md#permissions) and [_](/dev-tools/bundles/permissions.md). Each item of `permissions` has the following attributes: From 2aadfcbcb220c56a99dc4e7ac3ccf3557ad4dbcb Mon Sep 17 00:00:00 2001 From: Ilya Kuznetsov Date: Thu, 2 Jan 2025 18:24:18 +0100 Subject: [PATCH 10/26] feat: Support for resources --- Makefile | 2 +- bundle/internal/docs/.gitignore | 1 + bundle/internal/docs/README.md | 68 ++- bundle/internal/docs/docs.go | 13 +- bundle/internal/docs/docs.md | 1019 ------------------------------- bundle/internal/docs/main.go | 63 +- 6 files changed, 123 insertions(+), 1043 deletions(-) create mode 100644 bundle/internal/docs/.gitignore delete mode 100644 bundle/internal/docs/docs.md diff --git a/Makefile b/Makefile index 2db0ba0bd0..b309e2908a 100644 --- a/Makefile +++ b/Makefile @@ -36,7 +36,7 @@ schema: docs: @echo "✓ Generating docs using ./bundle/internal/schema/annotations.yml file..." - @go run ./bundle/internal/docs ./bundle/internal/schema ./bundle/internal/docs/docs.md + @go run ./bundle/internal/docs ./bundle/internal/schema ./bundle/internal/docs @echo "✓ Writing docs to ./bundle/internal/docs/docs.md" INTEGRATION = gotestsum --format github-actions --rerun-fails --jsonfile output.json --packages "./integration/..." -- -parallel 4 -timeout=2h diff --git a/bundle/internal/docs/.gitignore b/bundle/internal/docs/.gitignore new file mode 100644 index 0000000000..294bc037fc --- /dev/null +++ b/bundle/internal/docs/.gitignore @@ -0,0 +1 @@ +output/**/* diff --git a/bundle/internal/docs/README.md b/bundle/internal/docs/README.md index 77d9abcad3..0dd5727f43 100644 --- a/bundle/internal/docs/README.md +++ b/bundle/internal/docs/README.md @@ -3,5 +3,69 @@ 1. Install [Golang](https://go.dev/doc/install) 2. Run `go mod download` from the repo root 3. Run `make docs` from the repo -4. See generated document in `./bundle/internal/docs/docs.md` -5. To change descriptions update content in `./bundle/internal/schema/annotations.yml` and re-run `make docs` +4. See generated documents in `./bundle/internal/docs/output` directory +5. To change descriptions update content in `./bundle/internal/schema/annotations.yml` or `./bundle/internal/schema/annotations_openapi_overrides.yml` and re-run `make docs` + +For simpler usage run it together with copy command to move resulting files to local `docs` repo. Note that it will overwrite any local changes in affected files. Example: + +``` +make docs && cp bundle/internal/docs/output/*.md ../docs/source/dev-tools/bundles +``` + +To change file names or file headers update them in `main.go` file in this directory + +### Annotation file structure + +```yaml +"": + "": + description: Description of the property, only plain text is supported + markdown_description: Description with markdown support, if defined it will override the value in docs and in JSON-schema + markdown_examples: Custom block for any example, in free form, Markdown is supported + title: JSON-schema title, not used in docs + default: Default value of the property, not used in docs + enum: Possible values of enum-type, not used in docs +``` + +Descriptions with `PLACEHOLDER` value are not displayed in docs and JSON-schema + +All relative links like `[_](/dev-tools/bundles/settings.md#cluster_id)` are kept as is in docs but converted to absolute links in JSON schema + +### Example annotation + +```yaml +github.com/databricks/cli/bundle/config.Bundle: + "cluster_id": + "description": |- + The ID of a cluster to use to run the bundle. + "markdown_description": |- + The ID of a cluster to use to run the bundle. See [_](/dev-tools/bundles/settings.md#cluster_id). + "compute_id": + "description": |- + PLACEHOLDER + "databricks_cli_version": + "description": |- + The Databricks CLI version to use for the bundle. + "markdown_description": |- + The Databricks CLI version to use for the bundle. See [_](/dev-tools/bundles/settings.md#databricks_cli_version). + "deployment": + "description": |- + The definition of the bundle deployment + "markdown_description": |- + The definition of the bundle deployment. For supported attributes, see [_](#deployment) and [_](/dev-tools/bundles/deployment-modes.md). + "git": + "description": |- + The Git version control details that are associated with your bundle. + "markdown_description": |- + The Git version control details that are associated with your bundle. For supported attributes, see [_](#git) and [_](/dev-tools/bundles/settings.md#git). + "name": + "description": |- + The name of the bundle. + "uuid": + "description": |- + PLACEHOLDER +``` + +### TODO + +Add file watcher to track changes in the annotation files and re-run `make docs` script automtically diff --git a/bundle/internal/docs/docs.go b/bundle/internal/docs/docs.go index 57d42ce14d..dd93774875 100644 --- a/bundle/internal/docs/docs.go +++ b/bundle/internal/docs/docs.go @@ -81,16 +81,7 @@ func getNodes(s jsonschema.Schema, refs map[string]jsonschema.Schema, customFiel return nodes } -const header = `--- -description: Configuration reference for databricks.yml ---- - -# Configuration reference - -This article provides reference for keys supported by configuration (YAML). See [\_](/dev-tools/bundles/index.md). -` - -func buildMarkdown(nodes []rootNode, outputFile string) error { +func buildMarkdown(nodes []rootNode, outputFile, header string) error { f, err := os.Create(outputFile) if err != nil { log.Fatal(err) @@ -136,7 +127,7 @@ func buildMarkdown(nodes []rootNode, outputFile string) error { if node.Example != "" { m = m.LF() - m = m.H3("Example") + m = m.PlainText("**Example**") m = m.LF() m = m.PlainText(node.Example) } diff --git a/bundle/internal/docs/docs.md b/bundle/internal/docs/docs.md deleted file mode 100644 index 04b1e83d3a..0000000000 --- a/bundle/internal/docs/docs.md +++ /dev/null @@ -1,1019 +0,0 @@ ---- -description: Configuration reference for databricks.yml ---- - -# Configuration reference - -This article provides reference for keys supported by configuration (YAML). See [\_](/dev-tools/bundles/index.md). - - -## artifacts - -Defines the attributes to build an artifact - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - - - Map - - Item of the `artifacts` map - -Each item has the following attributes: - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - build - - String - - An optional set of non-default build commands that you want to run locally before deployment. For Python wheel builds, the Databricks CLI assumes that it can find a local install of the Python wheel package to run builds, and it runs the command python setup.py bdist_wheel by default during each bundle deployment. To specify multiple build commands, separate each command with double-ampersand (&&) characters. - - * - executable - - String - - The executable type. - - * - files - - Sequence - - The source files for the artifact, defined as an [_](#artifact_file). - - * - path - - String - - The location where the built artifact will be saved. - - * - type - - String - - The type of the artifact. Valid values are `wheel` or `jar` - - -### Example - -```yaml -artifacts: - default: - type: whl - build: poetry build - path: . -``` - -## bundle - -The attributes of the bundle. See [_](/dev-tools/bundles/settings.md#bundle) - - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - cluster_id - - String - - The ID of a cluster to use to run the bundle. See [_](/dev-tools/bundles/settings.md#cluster_id). - - * - compute_id - - String - - - - * - databricks_cli_version - - String - - The Databricks CLI version to use for the bundle. See [_](/dev-tools/bundles/settings.md#databricks_cli_version). - - * - deployment - - Map - - The definition of the bundle deployment. For supported attributes, see [_](#deployment) and [_](/dev-tools/bundles/deployment-modes.md). - - * - git - - Map - - The Git version control details that are associated with your bundle. For supported attributes, see [_](#git) and [_](/dev-tools/bundles/settings.md#git). - - * - name - - String - - The name of the bundle. - - * - uuid - - String - - - - -### bundle.deployment - -The definition of the bundle deployment - - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - fail_on_active_runs - - Boolean - - Whether to fail on active runs. If this is set to true a deployment that is running can be interrupted. - - * - lock - - Map - - The deployment lock attributes. See [_](#lock). - - -### bundle.deployment.lock - -The deployment lock attributes. - - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - enabled - - Boolean - - Whether this lock is enabled. - - * - force - - Boolean - - Whether to force this lock if it is enabled. - - -### bundle.git - -The Git version control details that are associated with your bundle. - - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - branch - - String - - The Git branch name. See [_](/dev-tools/bundles/settings.md#git). - - * - origin_url - - String - - The origin URL of the repository. See [_](/dev-tools/bundles/settings.md#git). - - -## experimental - -Defines attributes for experimental features. - - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - pydabs - - Map - - The PyDABs configuration. - - * - python_wheel_wrapper - - Boolean - - Whether to use a Python wheel wrapper - - * - scripts - - Map - - The commands to run - - * - use_legacy_run_as - - Boolean - - Whether to use the legacy run_as behavior - - -### experimental.pydabs - -The PyDABs configuration. - - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - enabled - - Boolean - - Whether or not PyDABs (Private Preview) is enabled - - * - import - - Sequence - - The PyDABs project to import to discover resources, resource generator and mutators - - * - venv_path - - String - - The Python virtual environment path - - -## include - -Specifies a list of path globs that contain configuration files to include within the bundle. See [_](/dev-tools/bundles/settings.md#include) - - -## permissions - -**`Type: Array`** - -Defines the permissions to apply to experiments, jobs, pipelines, and models defined in the bundle. See [_](/dev-tools/bundles/settings.md#permissions) and [_](/dev-tools/bundles/permissions.md). - -Each item of `permissions` has the following attributes: - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - group_name - - String - - The name of the group that has the permission set in level. - - * - level - - String - - The allowed permission for user, group, service principal defined for this permission. - - * - service_principal_name - - String - - The name of the service principal that has the permission set in level. - - * - user_name - - String - - The name of the user that has the permission set in level. - - -### Example - -```yaml -permissions: - - level: CAN_VIEW - group_name: test-group - - level: CAN_MANAGE - user_name: someone@example.com - - level: CAN_RUN - service_principal_name: 123456-abcdef -``` - -## presets - -Defines bundle deployment presets. See [_](/dev-tools/bundles/deployment-modes.md#presets). - - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - jobs_max_concurrent_runs - - Integer - - The maximum concurrent runs for a job. - - * - name_prefix - - String - - The prefix for job runs of the bundle. - - * - pipelines_development - - Boolean - - Whether pipeline deployments should be locked in development mode. - - * - source_linked_deployment - - Boolean - - Whether to link the deployment to the bundle source. - - * - tags - - Map - - The tags for the bundle deployment. - - * - trigger_pause_status - - String - - A pause status to apply to all job triggers and schedules. Valid values are PAUSED or UNPAUSED. - - -## resources - -Specifies information about the Databricks resources used by the bundle. See [_](/dev-tools/bundles/resources.md). - - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - clusters - - Map - - The cluster definitions for the bundle. See [_](/dev-tools/bundles/resources.md#cluster) - - * - dashboards - - Map - - The dashboard definitions for the bundle. See [_](/dev-tools/bundles/resources.md#dashboard) - - * - experiments - - Map - - The experiment definitions for the bundle. See [_](/dev-tools/bundles/resources.md#experiment) - - * - jobs - - Map - - The job definitions for the bundle. See [_](/dev-tools/bundles/resources.md#job) - - * - model_serving_endpoints - - Map - - The model serving endpoint definitions for the bundle. See [_](/dev-tools/bundles/resources.md#model_serving_endpoint) - - * - models - - Map - - The model definitions for the bundle. See [_](/dev-tools/bundles/resources.md#model) - - * - pipelines - - Map - - The pipeline definitions for the bundle. See [_](/dev-tools/bundles/resources.md#pipeline) - - * - quality_monitors - - Map - - The quality monitor definitions for the bundle. See [_](/dev-tools/bundles/resources.md#quality_monitor) - - * - registered_models - - Map - - The registered model definitions for the bundle. See [_](/dev-tools/bundles/resources.md#registered_model) - - * - schemas - - Map - - The schema definitions for the bundle. See [_](/dev-tools/bundles/resources.md#schema) - - * - volumes - - Map - - - - -## run_as - -The identity to use to run the bundle. - - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - service_principal_name - - String - - - - * - user_name - - String - - - - -## sync - -The files and file paths to include or exclude in the bundle. See [_](/dev-tools/bundles/) - - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - exclude - - Sequence - - A list of files or folders to exclude from the bundle. - - * - include - - Sequence - - A list of files or folders to include in the bundle. - - * - paths - - Sequence - - The local folder paths, which can be outside the bundle root, to synchronize to the workspace when the bundle is deployed. - - -## targets - -Defines deployment targets for the bundle. - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - - - Map - - Item of the `targets` map - -Each item has the following attributes: - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - artifacts - - Map - - The artifacts to include in the target deployment. See [_](#artifact) - - * - bundle - - Map - - The name of the bundle when deploying to this target. - - * - cluster_id - - String - - The ID of the cluster to use for this target. - - * - compute_id - - String - - Deprecated. The ID of the compute to use for this target. - - * - default - - Boolean - - Whether this target is the default target. - - * - git - - Map - - The Git version control settings for the target. See [_](#git). - - * - mode - - String - - The deployment mode for the target. Valid values are `development` or `production`. See [_](/dev-tools/bundles/deployment-modes.md). - - * - permissions - - Sequence - - The permissions for deploying and running the bundle in the target. See [_](#permission). - - * - presets - - Map - - The deployment presets for the target. See [_](#preset). - - * - resources - - Map - - The resource definitions for the target. See [_](#resources). - - * - run_as - - Map - - The identity to use to run the bundle. See [_](#job_run_as) and [_](/dev-tools/bundles/run_as.md). - - * - sync - - Map - - The local paths to sync to the target workspace when a bundle is run or deployed. See [_](#sync). - - * - variables - - Map - - The custom variable definitions for the target. See [_](/dev-tools/bundles/settings.md#variables) and [_](/dev-tools/bundles/variables.md). - - * - workspace - - Map - - The Databricks workspace for the target. [_](#workspace) - - -### targets.bundle - -The name of the bundle when deploying to this target. - - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - cluster_id - - String - - The ID of a cluster to use to run the bundle. See [_](/dev-tools/bundles/settings.md#cluster_id). - - * - compute_id - - String - - - - * - databricks_cli_version - - String - - The Databricks CLI version to use for the bundle. See [_](/dev-tools/bundles/settings.md#databricks_cli_version). - - * - deployment - - Map - - The definition of the bundle deployment. For supported attributes, see [_](#deployment) and [_](/dev-tools/bundles/deployment-modes.md). - - * - git - - Map - - The Git version control details that are associated with your bundle. For supported attributes, see [_](#git) and [_](/dev-tools/bundles/settings.md#git). - - * - name - - String - - The name of the bundle. - - * - uuid - - String - - - - -### targets.bundle.deployment - -The definition of the bundle deployment - - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - fail_on_active_runs - - Boolean - - Whether to fail on active runs. If this is set to true a deployment that is running can be interrupted. - - * - lock - - Map - - The deployment lock attributes. See [_](#lock). - - -### targets.bundle.deployment.lock - -The deployment lock attributes. - - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - enabled - - Boolean - - Whether this lock is enabled. - - * - force - - Boolean - - Whether to force this lock if it is enabled. - - -### targets.bundle.git - -The Git version control details that are associated with your bundle. - - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - branch - - String - - The Git branch name. See [_](/dev-tools/bundles/settings.md#git). - - * - origin_url - - String - - The origin URL of the repository. See [_](/dev-tools/bundles/settings.md#git). - - -### targets.git - -The Git version control settings for the target. - - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - branch - - String - - The Git branch name. See [_](/dev-tools/bundles/settings.md#git). - - * - origin_url - - String - - The origin URL of the repository. See [_](/dev-tools/bundles/settings.md#git). - - -### targets.presets - -The deployment presets for the target. - - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - jobs_max_concurrent_runs - - Integer - - The maximum concurrent runs for a job. - - * - name_prefix - - String - - The prefix for job runs of the bundle. - - * - pipelines_development - - Boolean - - Whether pipeline deployments should be locked in development mode. - - * - source_linked_deployment - - Boolean - - Whether to link the deployment to the bundle source. - - * - tags - - Map - - The tags for the bundle deployment. - - * - trigger_pause_status - - String - - A pause status to apply to all job triggers and schedules. Valid values are PAUSED or UNPAUSED. - - -### targets.resources - -The resource definitions for the target. - - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - clusters - - Map - - The cluster definitions for the bundle. See [_](/dev-tools/bundles/resources.md#cluster) - - * - dashboards - - Map - - The dashboard definitions for the bundle. See [_](/dev-tools/bundles/resources.md#dashboard) - - * - experiments - - Map - - The experiment definitions for the bundle. See [_](/dev-tools/bundles/resources.md#experiment) - - * - jobs - - Map - - The job definitions for the bundle. See [_](/dev-tools/bundles/resources.md#job) - - * - model_serving_endpoints - - Map - - The model serving endpoint definitions for the bundle. See [_](/dev-tools/bundles/resources.md#model_serving_endpoint) - - * - models - - Map - - The model definitions for the bundle. See [_](/dev-tools/bundles/resources.md#model) - - * - pipelines - - Map - - The pipeline definitions for the bundle. See [_](/dev-tools/bundles/resources.md#pipeline) - - * - quality_monitors - - Map - - The quality monitor definitions for the bundle. See [_](/dev-tools/bundles/resources.md#quality_monitor) - - * - registered_models - - Map - - The registered model definitions for the bundle. See [_](/dev-tools/bundles/resources.md#registered_model) - - * - schemas - - Map - - The schema definitions for the bundle. See [_](/dev-tools/bundles/resources.md#schema) - - * - volumes - - Map - - - - -### targets.sync - -The local paths to sync to the target workspace when a bundle is run or deployed. - - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - exclude - - Sequence - - A list of files or folders to exclude from the bundle. - - * - include - - Sequence - - A list of files or folders to include in the bundle. - - * - paths - - Sequence - - The local folder paths, which can be outside the bundle root, to synchronize to the workspace when the bundle is deployed. - - -### targets.workspace - -The Databricks workspace for the target. - - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - artifact_path - - String - - The artifact path to use within the workspace for both deployments and workflow runs - - * - auth_type - - String - - The authentication type. - - * - azure_client_id - - String - - The Azure client ID - - * - azure_environment - - String - - The Azure environment - - * - azure_login_app_id - - String - - The Azure login app ID - - * - azure_tenant_id - - String - - The Azure tenant ID - - * - azure_use_msi - - Boolean - - Whether to use MSI for Azure - - * - azure_workspace_resource_id - - String - - The Azure workspace resource ID - - * - client_id - - String - - The client ID for the workspace - - * - file_path - - String - - The file path to use within the workspace for both deployments and workflow runs - - * - google_service_account - - String - - The Google service account name - - * - host - - String - - The Databricks workspace host URL - - * - profile - - String - - The Databricks workspace profile name - - * - resource_path - - String - - The workspace resource path - - * - root_path - - String - - The Databricks workspace root path - - * - state_path - - String - - The workspace state path - - -## variables - -A Map that defines the custom variables for the bundle, where each key is the name of the variable, and the value is a Map that defines the variable. - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - - - Map - - Item of the `variables` map - -Each item has the following attributes: - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - default - - Any - - - - * - description - - String - - The description of the variable - - * - lookup - - Map - - The name of the `alert`, `cluster_policy`, `cluster`, `dashboard`, `instance_pool`, `job`, `metastore`, `pipeline`, `query`, `service_principal`, or `warehouse` object for which to retrieve an ID." - - * - type - - String - - The type of the variable. - - -### variables.lookup - -The name of the alert, cluster_policy, cluster, dashboard, instance_pool, job, metastore, pipeline, query, service_principal, or warehouse object for which to retrieve an ID. - - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - alert - - String - - - - * - cluster - - String - - - - * - cluster_policy - - String - - - - * - dashboard - - String - - - - * - instance_pool - - String - - - - * - job - - String - - - - * - metastore - - String - - - - * - notification_destination - - String - - - - * - pipeline - - String - - - - * - query - - String - - - - * - service_principal - - String - - - - * - warehouse - - String - - - - -## workspace - -Defines the Databricks workspace for the bundle. - - - -.. list-table:: - :header-rows: 1 - - * - Key - - Type - - Description - - * - artifact_path - - String - - The artifact path to use within the workspace for both deployments and workflow runs - - * - auth_type - - String - - The authentication type. - - * - azure_client_id - - String - - The Azure client ID - - * - azure_environment - - String - - The Azure environment - - * - azure_login_app_id - - String - - The Azure login app ID - - * - azure_tenant_id - - String - - The Azure tenant ID - - * - azure_use_msi - - Boolean - - Whether to use MSI for Azure - - * - azure_workspace_resource_id - - String - - The Azure workspace resource ID - - * - client_id - - String - - The client ID for the workspace - - * - file_path - - String - - The file path to use within the workspace for both deployments and workflow runs - - * - google_service_account - - String - - The Google service account name - - * - host - - String - - The Databricks workspace host URL - - * - profile - - String - - The Databricks workspace profile name - - * - resource_path - - String - - The workspace resource path - - * - root_path - - String - - The Databricks workspace root path - - * - state_path - - String - - The workspace state path - \ No newline at end of file diff --git a/bundle/internal/docs/main.go b/bundle/internal/docs/main.go index f0e8ef2e09..beb4954c8b 100644 --- a/bundle/internal/docs/main.go +++ b/bundle/internal/docs/main.go @@ -4,7 +4,7 @@ import ( "fmt" "log" "os" - "path/filepath" + "path" "reflect" "strings" @@ -13,25 +13,68 @@ import ( "github.com/databricks/cli/libs/jsonschema" ) +const ( + rootFileName = "reference.md" + rootHeader = `--- +description: Configuration reference for databricks.yml +--- + +# Configuration reference + +This article provides reference for keys supported by configuration (YAML). See [\_](/dev-tools/bundles/index.md). +` +) + +const ( + resourcesFileName = "resources-reference.md" + resourcesHeader = `--- +description: Resources references for databricks.yml +--- + +# Resources reference + +This article provides reference for keys supported by configuration (YAML). See [\_](/dev-tools/bundles/index.md). +` +) + func main() { if len(os.Args) != 3 { fmt.Println("Usage: go run main.go ") os.Exit(1) } - annotationFile := os.Args[1] - outputFile := os.Args[2] + annotationDir := os.Args[1] + docsDir := os.Args[2] + outputDir := path.Join(docsDir, "output") + + if _, err := os.Stat(outputDir); os.IsNotExist(err) { + if err := os.MkdirAll(outputDir, 0o755); err != nil { + log.Fatal(err) + } + } - err := generateDocs(annotationFile, outputFile) + err := generateDocs( + []string{path.Join(annotationDir, "annotations.yml")}, + path.Join(outputDir, rootFileName), + reflect.TypeOf(config.Root{}), + rootHeader, + ) + if err != nil { + log.Fatal(err) + } + err = generateDocs( + []string{path.Join(annotationDir, "annotations_openapi.yml"), path.Join(annotationDir, "annotations_openapi_overrides.yml")}, + path.Join(outputDir, resourcesFileName), + reflect.TypeOf(config.Resources{}), + resourcesHeader, + ) if err != nil { log.Fatal(err) } } -func generateDocs(workdir, outputPath string) error { - annotationsPath := filepath.Join(workdir, "annotations.yml") - - annotations, err := annotation.LoadAndMerge([]string{annotationsPath}) +func generateDocs(inputPaths []string, outputPath string, rootType reflect.Type, header string) error { + annotations, err := annotation.LoadAndMerge(inputPaths) if err != nil { log.Fatal(err) } @@ -39,7 +82,7 @@ func generateDocs(workdir, outputPath string) error { schemas := map[string]jsonschema.Schema{} customFields := map[string]bool{} - s, err := jsonschema.FromType(reflect.TypeOf(config.Root{}), []func(reflect.Type, jsonschema.Schema) jsonschema.Schema{ + s, err := jsonschema.FromType(rootType, []func(reflect.Type, jsonschema.Schema) jsonschema.Schema{ func(typ reflect.Type, s jsonschema.Schema) jsonschema.Schema { _, isCustomField := annotations[jsonschema.TypePath(typ)] if isCustomField { @@ -75,7 +118,7 @@ func generateDocs(workdir, outputPath string) error { } nodes := getNodes(s, schemas, customFields) - err = buildMarkdown(nodes, outputPath) + err = buildMarkdown(nodes, outputPath, header) if err != nil { log.Fatal(err) } From c6703c13637c0d85e6754e4c802f38052304a5d6 Mon Sep 17 00:00:00 2001 From: Ilya Kuznetsov Date: Fri, 3 Jan 2025 13:52:59 +0100 Subject: [PATCH 11/26] fix: Updated styles --- bundle/internal/docs/docs.go | 57 ++++++++++++++++++++++-------------- 1 file changed, 35 insertions(+), 22 deletions(-) diff --git a/bundle/internal/docs/docs.go b/bundle/internal/docs/docs.go index dd93774875..cae0b7fbd4 100644 --- a/bundle/internal/docs/docs.go +++ b/bundle/internal/docs/docs.go @@ -20,6 +20,7 @@ type rootNode struct { ObjectKeyAttributes []attributeNode ArrayItemAttributes []attributeNode TopLevel bool + Type string } type attributeNode struct { @@ -51,6 +52,7 @@ func getNodes(s jsonschema.Schema, refs map[string]jsonschema.Schema, customFiel Description: getDescription(v, item.topLevel), TopLevel: item.topLevel, Example: getExample(v), + Type: getHumanReadableType(v.Type), } node.Attributes = getAttributes(v.Properties, refs) @@ -91,8 +93,6 @@ func buildMarkdown(nodes []rootNode, outputFile, header string) error { m := md.NewMarkdown(f) m = m.PlainText(header) for _, node := range nodes { - isArray := len(node.ArrayItemAttributes) > 0 - m = m.LF() if node.TopLevel { m = m.H2(node.Title) @@ -100,24 +100,24 @@ func buildMarkdown(nodes []rootNode, outputFile, header string) error { m = m.H3(node.Title) } m = m.LF() - if isArray { - m = m.PlainText("**`Type: Array`**") + + if node.Type != "" { + m = m.PlainText(fmt.Sprintf("**`Type: %s`**", node.Type)) m = m.LF() - m = m.PlainText(node.Description) - } else { - m = m.PlainText(node.Description) } + m = m.PlainText(node.Description) m = m.LF() if len(node.ObjectKeyAttributes) > 0 { + itemName := removePluralForm(node.Title) + fieldName := fmt.Sprintf("%s-name", itemName) m = buildAttributeTable(m, []attributeNode{ - {Title: fmt.Sprintf("<%s-entry-name>", node.Title), Type: "Map", Description: fmt.Sprintf("Item of the `%s` map", node.Title)}, + {Title: fieldName, Type: "Map", Description: fmt.Sprintf("The definition of a %s. See %s", itemName, md.Link("_", "#"+fieldName))}, }) - m = m.PlainText("Each item has the following attributes:") m = m.LF() + m = m.H3(fieldName) m = buildAttributeTable(m, node.ObjectKeyAttributes) } else if len(node.ArrayItemAttributes) > 0 { - m = m.PlainTextf("Each item of `%s` has the following attributes:", node.Title) m = m.LF() m = buildAttributeTable(m, node.ArrayItemAttributes) } else if len(node.Attributes) > 0 { @@ -141,18 +141,28 @@ func buildMarkdown(nodes []rootNode, outputFile, header string) error { return nil } +func removePluralForm(s string) string { + if strings.HasSuffix(s, "s") { + return strings.TrimSuffix(s, "s") + } + return s +} + func buildAttributeTable(m *md.Markdown, attributes []attributeNode) *md.Markdown { return buildCustomAttributeTable(m, attributes) - rows := [][]string{} - for _, n := range attributes { - rows = append(rows, []string{fmt.Sprintf("`%s`", n.Title), n.Type, formatDescription(n.Description)}) - } - m = m.CustomTable(md.TableSet{ - Header: []string{"Key", "Type", "Description"}, - Rows: rows, - }, md.TableOptions{AutoWrapText: false, AutoFormatHeaders: false}) - return m + // Rows below are useful for debugging since it renders the table in a regular markdown format + + // rows := [][]string{} + // for _, n := range attributes { + // rows = append(rows, []string{fmt.Sprintf("`%s`", n.Title), n.Type, formatDescription(n.Description)}) + // } + // m = m.CustomTable(md.TableSet{ + // Header: []string{"Key", "Type", "Description"}, + // Rows: rows, + // }, md.TableOptions{AutoWrapText: false, AutoFormatHeaders: false}) + + // return m } func formatDescription(s string) string { @@ -172,7 +182,7 @@ func buildCustomAttributeTable(m *md.Markdown, attributes []attributeNode) *md.M m = m.LF() for _, a := range attributes { - m = m.PlainText(" * - " + a.Title) + m = m.PlainText(" * - " + fmt.Sprintf("`%s`", a.Title)) m = m.PlainText(" - " + a.Type) m = m.PlainText(" - " + formatDescription(a.Description)) m = m.LF() @@ -180,7 +190,7 @@ func buildCustomAttributeTable(m *md.Markdown, attributes []attributeNode) *md.M return m } -func getAttributes(props map[string]*jsonschema.Schema, refs map[string]jsonschema.Schema) []attributeNode { +func getHumanReadableType(t jsonschema.Type) string { typesMapping := map[string]string{ "string": "String", "integer": "Integer", @@ -188,11 +198,14 @@ func getAttributes(props map[string]*jsonschema.Schema, refs map[string]jsonsche "array": "Sequence", "object": "Map", } + return typesMapping[string(t)] +} +func getAttributes(props map[string]*jsonschema.Schema, refs map[string]jsonschema.Schema) []attributeNode { attributes := []attributeNode{} for k, v := range props { v = resolveRefs(v, refs) - typeString := typesMapping[string(v.Type)] + typeString := getHumanReadableType(v.Type) if typeString == "" { typeString = "Any" } From fe6ba76b7d16c29946718b31187fe0aab8d09fb8 Mon Sep 17 00:00:00 2001 From: Ilya Kuznetsov Date: Tue, 7 Jan 2025 16:52:01 +0100 Subject: [PATCH 12/26] fix: Styling --- bundle/internal/docs/docs.go | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/bundle/internal/docs/docs.go b/bundle/internal/docs/docs.go index cae0b7fbd4..35bb138871 100644 --- a/bundle/internal/docs/docs.go +++ b/bundle/internal/docs/docs.go @@ -109,13 +109,9 @@ func buildMarkdown(nodes []rootNode, outputFile, header string) error { m = m.LF() if len(node.ObjectKeyAttributes) > 0 { - itemName := removePluralForm(node.Title) - fieldName := fmt.Sprintf("%s-name", itemName) - m = buildAttributeTable(m, []attributeNode{ - {Title: fieldName, Type: "Map", Description: fmt.Sprintf("The definition of a %s. See %s", itemName, md.Link("_", "#"+fieldName))}, - }) + n := removePluralForm(node.Title) + m = m.CodeBlocks("yaml", fmt.Sprintf("%ss:\n <%s-name>:\n <%s-field-name>: <%s-field-value>", n, n, n, n)) m = m.LF() - m = m.H3(fieldName) m = buildAttributeTable(m, node.ObjectKeyAttributes) } else if len(node.ArrayItemAttributes) > 0 { m = m.LF() From c355fbfcb2acd32e275ee93e0164fe355af19004 Mon Sep 17 00:00:00 2001 From: Ilya Kuznetsov Date: Wed, 8 Jan 2025 16:11:35 +0100 Subject: [PATCH 13/26] fix: Description of root types with additional properties --- bundle/internal/docs/docs.go | 6 ++++++ bundle/internal/docs/main.go | 3 ++- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/bundle/internal/docs/docs.go b/bundle/internal/docs/docs.go index 35bb138871..24e8ba6eed 100644 --- a/bundle/internal/docs/docs.go +++ b/bundle/internal/docs/docs.go @@ -61,6 +61,12 @@ func getNodes(s jsonschema.Schema, refs map[string]jsonschema.Schema, customFiel additionalProps, ok := v.AdditionalProperties.(*jsonschema.Schema) if ok { objectKeyType := resolveRefs(additionalProps, refs) + if node.Description == "" { + node.Description = getDescription(objectKeyType, true) + } + if len(node.Example) == 0 { + node.Example = getExample(objectKeyType) + } node.ObjectKeyAttributes = getAttributes(objectKeyType.Properties, refs) rootProps = append(rootProps, extractNodes(k, objectKeyType.Properties, refs, customFields)...) } diff --git a/bundle/internal/docs/main.go b/bundle/internal/docs/main.go index beb4954c8b..f2f0cc55d6 100644 --- a/bundle/internal/docs/main.go +++ b/bundle/internal/docs/main.go @@ -88,11 +88,11 @@ func generateDocs(inputPaths []string, outputPath string, rootType reflect.Type, if isCustomField { customFields[jsonschema.TypePath(typ)] = true } - schemas[jsonschema.TypePath(typ)] = s refPath := getPath(typ) shouldHandle := strings.HasPrefix(refPath, "github.com") if !shouldHandle { + schemas[jsonschema.TypePath(typ)] = s return s } @@ -110,6 +110,7 @@ func generateDocs(inputPaths []string, outputPath string, rootType reflect.Type, assignAnnotation(v, a[k]) } + schemas[jsonschema.TypePath(typ)] = s return s }, }) From f9278c2b4dfbf6bd2704091b9a120bd0d787dbca Mon Sep 17 00:00:00 2001 From: Ilya Kuznetsov Date: Wed, 8 Jan 2025 16:11:58 +0100 Subject: [PATCH 14/26] docs: Add override for volume spec --- .../schema/annotations_openapi_overrides.yml | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/bundle/internal/schema/annotations_openapi_overrides.yml b/bundle/internal/schema/annotations_openapi_overrides.yml index ef602d6efe..21e8217b25 100644 --- a/bundle/internal/schema/annotations_openapi_overrides.yml +++ b/bundle/internal/schema/annotations_openapi_overrides.yml @@ -69,6 +69,25 @@ github.com/databricks/cli/bundle/config/resources.Schema: "description": |- PLACEHOLDER github.com/databricks/cli/bundle/config/resources.Volume: + "_": + "markdown_description": |- + The volume resource type allows you to define and create Unity Catalog [volumes](https://docs.databricks.com/api/workspace/volumes/create) as part of a bundle. When deploying a bundle with a volume defined, note that: + + * A volume cannot be referenced in the `artifact_path` for the bundle until it exists in the workspace. Hence, if you want to use Databricks Asset Bundles to create the volume, you must first define the volume in the bundle, deploy it to create the volume, then reference it in the `artifact_path`` in subsequent deployments. + + * Volumes in the bundle are not prepended with the `dev_${workspace.current_user.short_name}` prefix when the deployment target has `mode: development`` configured. However, you can manually configure this prefix. See [_](/dev-tools/bundles/deployment-modes.md#custom-presets) + + "markdown_examples": |- + The following example creates a Unity Catalog volume with the key `my_volume``: + + ```yaml + resources: + volumes: + my_volume: + catalog_name: main + name: my_volume + schema_name: my_schema + ``` "grants": "description": |- PLACEHOLDER From 6c5268aaaaaa92dda0355de2a570929afeeaf6ea Mon Sep 17 00:00:00 2001 From: Ilya Kuznetsov Date: Fri, 10 Jan 2025 12:52:20 +0100 Subject: [PATCH 15/26] fix: Missing array types --- bundle/internal/docs/docs.go | 81 ++++++++++++++++++++++++++++++------ bundle/internal/docs/main.go | 2 +- 2 files changed, 70 insertions(+), 13 deletions(-) diff --git a/bundle/internal/docs/docs.go b/bundle/internal/docs/docs.go index 24e8ba6eed..07979ead0b 100644 --- a/bundle/internal/docs/docs.go +++ b/bundle/internal/docs/docs.go @@ -27,6 +27,7 @@ type attributeNode struct { Title string Type string Description string + Reference string } type rootProp struct { @@ -35,17 +36,25 @@ type rootProp struct { topLevel bool } +const MapType = "Map" + func getNodes(s jsonschema.Schema, refs map[string]jsonschema.Schema, customFields map[string]bool) []rootNode { rootProps := []rootProp{} for k, v := range s.Properties { rootProps = append(rootProps, rootProp{k, v, true}) } nodes := make([]rootNode, 0, len(rootProps)) + visited := make(map[string]bool) for i := 0; i < len(rootProps); i++ { item := rootProps[i] k := item.k v := item.v + + if visited[k] { + continue + } + visited[k] = true v = resolveRefs(v, refs) node := rootNode{ Title: k, @@ -55,25 +64,27 @@ func getNodes(s jsonschema.Schema, refs map[string]jsonschema.Schema, customFiel Type: getHumanReadableType(v.Type), } - node.Attributes = getAttributes(v.Properties, refs) + node.Attributes = getAttributes(v.Properties, refs, k) rootProps = append(rootProps, extractNodes(k, v.Properties, refs, customFields)...) additionalProps, ok := v.AdditionalProperties.(*jsonschema.Schema) if ok { objectKeyType := resolveRefs(additionalProps, refs) - if node.Description == "" { - node.Description = getDescription(objectKeyType, true) + d := getDescription(objectKeyType, true) + if d != "" { + node.Description = d } if len(node.Example) == 0 { node.Example = getExample(objectKeyType) } - node.ObjectKeyAttributes = getAttributes(objectKeyType.Properties, refs) + node.ObjectKeyAttributes = getAttributes(objectKeyType.Properties, refs, k) rootProps = append(rootProps, extractNodes(k, objectKeyType.Properties, refs, customFields)...) } if v.Items != nil { arrayItemType := resolveRefs(v.Items, refs) - node.ArrayItemAttributes = getAttributes(arrayItemType.Properties, refs) + node.ArrayItemAttributes = getAttributes(arrayItemType.Properties, refs, k) + // rootProps = append(rootProps, extractNodes(k, arrayItemType.Properties, refs, customFields)...) } isEmpty := len(node.Attributes) == 0 && len(node.ObjectKeyAttributes) == 0 && len(node.ArrayItemAttributes) == 0 @@ -167,8 +178,18 @@ func buildAttributeTable(m *md.Markdown, attributes []attributeNode) *md.Markdow // return m } -func formatDescription(s string) string { - return strings.ReplaceAll(s, "\n", " ") +func formatDescription(a attributeNode) string { + s := strings.ReplaceAll(a.Description, "\n", " ") + return s + if a.Reference != "" { + if strings.HasSuffix(s, ".") { + s += " " + } else if s != "" { + s += ". " + } + s += fmt.Sprintf("See %s.", md.Link("_", "#"+a.Reference)) + } + return s } // Build a custom table which we use in Databricks website @@ -186,7 +207,7 @@ func buildCustomAttributeTable(m *md.Markdown, attributes []attributeNode) *md.M for _, a := range attributes { m = m.PlainText(" * - " + fmt.Sprintf("`%s`", a.Title)) m = m.PlainText(" - " + a.Type) - m = m.PlainText(" - " + formatDescription(a.Description)) + m = m.PlainText(" - " + formatDescription(a)) m = m.LF() } return m @@ -203,7 +224,7 @@ func getHumanReadableType(t jsonschema.Type) string { return typesMapping[string(t)] } -func getAttributes(props map[string]*jsonschema.Schema, refs map[string]jsonschema.Schema) []attributeNode { +func getAttributes(props map[string]*jsonschema.Schema, refs map[string]jsonschema.Schema, prefix string) []attributeNode { attributes := []attributeNode{} for k, v := range props { v = resolveRefs(v, refs) @@ -211,10 +232,15 @@ func getAttributes(props map[string]*jsonschema.Schema, refs map[string]jsonsche if typeString == "" { typeString = "Any" } + var reference string + if isReferenceType(v, refs) { + reference = prefix + "." + k + } attributes = append(attributes, attributeNode{ Title: k, Type: typeString, Description: getDescription(v, true), + Reference: reference, }) } sort.Slice(attributes, func(i, j int) bool { @@ -223,6 +249,35 @@ func getAttributes(props map[string]*jsonschema.Schema, refs map[string]jsonsche return attributes } +func isReferenceType(v *jsonschema.Schema, refs map[string]jsonschema.Schema) bool { + if len(v.Properties) > 0 { + return true + } + if v.Items != nil { + items := resolveRefs(v.Items, refs) + if items != nil && items.Type == "object" { + return true + } + } + props := resolveAdditionaProperties(v, refs) + if props != nil && props.Type == "object" { + return true + } + + return false +} + +func resolveAdditionaProperties(v *jsonschema.Schema, refs map[string]jsonschema.Schema) *jsonschema.Schema { + if v.AdditionalProperties == nil { + return nil + } + additionalProps, ok := v.AdditionalProperties.(*jsonschema.Schema) + if !ok { + return nil + } + return resolveRefs(additionalProps, refs) +} + func getDescription(s *jsonschema.Schema, allowMarkdown bool) string { if allowMarkdown && s.MarkdownDescription != "" { return s.MarkdownDescription @@ -265,8 +320,10 @@ func resolveRefs(s *jsonschema.Schema, schemas map[string]jsonschema.Schema) *js } func shouldExtract(ref string, customFields map[string]bool) bool { - refKey := strings.TrimPrefix(ref, "#/$defs/") - _, isCustomField := customFields[refKey] + if i := strings.Index(ref, "github.com"); i >= 0 { + ref = ref[i:] + } + _, isCustomField := customFields[ref] return isCustomField } @@ -277,7 +334,7 @@ func extractNodes(prefix string, props map[string]*jsonschema.Schema, refs map[s continue } v = resolveRefs(v, refs) - if v.Type == "object" { + if v.Type == "object" || v.Type == "array" { nodes = append(nodes, rootProp{prefix + "." + k, v, false}) } } diff --git a/bundle/internal/docs/main.go b/bundle/internal/docs/main.go index f2f0cc55d6..b3e95db68d 100644 --- a/bundle/internal/docs/main.go +++ b/bundle/internal/docs/main.go @@ -63,7 +63,7 @@ func main() { log.Fatal(err) } err = generateDocs( - []string{path.Join(annotationDir, "annotations_openapi.yml"), path.Join(annotationDir, "annotations_openapi_overrides.yml")}, + []string{path.Join(annotationDir, "annotations_openapi.yml"), path.Join(annotationDir, "annotations_openapi_overrides.yml"), path.Join(annotationDir, "annotations.yml")}, path.Join(outputDir, resourcesFileName), reflect.TypeOf(config.Resources{}), resourcesHeader, From bad77bd35876373f24c135431b1a6f258ace6055 Mon Sep 17 00:00:00 2001 From: Ilya Kuznetsov Date: Fri, 10 Jan 2025 16:01:52 +0100 Subject: [PATCH 16/26] fix: Sync annotations --- bundle/internal/schema/annotations.yml | 99 +++++++++++++++----------- 1 file changed, 57 insertions(+), 42 deletions(-) diff --git a/bundle/internal/schema/annotations.yml b/bundle/internal/schema/annotations.yml index b66f989437..b72d544cf5 100644 --- a/bundle/internal/schema/annotations.yml +++ b/bundle/internal/schema/annotations.yml @@ -1,31 +1,25 @@ github.com/databricks/cli/bundle/config.Artifact: "build": "description": |- - An optional set of non-default build commands that you want to run locally before deployment. - - For Python wheel builds, the Databricks CLI assumes that it can find a local install of the Python wheel package to run builds, and it runs the command python setup.py bdist_wheel by default during each bundle deployment. - - To specify multiple build commands, separate each command with double-ampersand (&&) characters. + An optional set of non-default build commands to run locally before deployment. "executable": "description": |- - The executable type. + The executable type. Valid values are `bash`, `sh`, and `cmd`. "files": "description": |- The source files for the artifact. - "markdown_description": |- - The source files for the artifact, defined as an [_](#artifact_file). "path": "description": |- The location where the built artifact will be saved. "type": "description": |- - The type of the artifact. + Required. The type of the artifact. "markdown_description": |- - The type of the artifact. Valid values are `wheel` or `jar` + Required. The type of the artifact. Valid values are `whl`. github.com/databricks/cli/bundle/config.ArtifactFile: "source": "description": |- - The path of the files used to build the artifact. + Required. The path of the files used to build the artifact. github.com/databricks/cli/bundle/config.Bundle: "cluster_id": "description": |- @@ -71,13 +65,13 @@ github.com/databricks/cli/bundle/config.Experimental: The PyDABs configuration. "python_wheel_wrapper": "description": |- - Whether to use a Python wheel wrapper + Whether to use a Python wheel wrapper. "scripts": "description": |- - The commands to run + The commands to run. "use_legacy_run_as": "description": |- - Whether to use the legacy run_as behavior + Whether to use the legacy run_as behavior. github.com/databricks/cli/bundle/config.Git: "branch": "description": |- @@ -128,61 +122,66 @@ github.com/databricks/cli/bundle/config.PyDABs: github.com/databricks/cli/bundle/config.Resources: "clusters": "description": |- - The cluster definitions for the bundle. + The cluster definitions for the bundle, where each key is the name of a cluster. "markdown_description": |- - The cluster definitions for the bundle. See [_](/dev-tools/bundles/resources.md#cluster) + The cluster definitions for the bundle, where each key is the name of a cluster. See [_](/dev-tools/bundles/resources.md#cluster) "dashboards": "description": |- - The dashboard definitions for the bundle. + The dashboard definitions for the bundle, where each key is the name of the dashboard. "markdown_description": |- - The dashboard definitions for the bundle. See [_](/dev-tools/bundles/resources.md#dashboard) + The dashboard definitions for the bundle, where each key is the name of the dashboard. See [_](/dev-tools/bundles/resources.md#dashboard) "experiments": "description": |- - The experiment definitions for the bundle. + The experiment definitions for the bundle, where each key is the name of the experiment. "markdown_description": |- - The experiment definitions for the bundle. See [_](/dev-tools/bundles/resources.md#experiment) + The experiment definitions for the bundle, where each key is the name of the experiment. See [_](/dev-tools/bundles/resources.md#experiment) "jobs": "description": |- - The job definitions for the bundle. + The job definitions for the bundle, where each key is the name of the job. "markdown_description": |- - The job definitions for the bundle. See [_](/dev-tools/bundles/resources.md#job) + The job definitions for the bundle, where each key is the name of the job. See [_](/dev-tools/bundles/resources.md#job) "model_serving_endpoints": "description": |- - The model serving endpoint definitions for the bundle. + The model serving endpoint definitions for the bundle, where each key is the name of the model serving endpoint. "markdown_description": |- - The model serving endpoint definitions for the bundle. See [_](/dev-tools/bundles/resources.md#model_serving_endpoint) + The model serving endpoint definitions for the bundle, where each key is the name of the model serving endpoint. See [_](/dev-tools/bundles/resources.md#model_serving_endpoint) "models": "description": |- - The model definitions for the bundle. + The model definitions for the bundle, where each key is the name of the model. "markdown_description": |- - The model definitions for the bundle. See [_](/dev-tools/bundles/resources.md#model) + The model definitions for the bundle, where each key is the name of the model. See [_](/dev-tools/bundles/resources.md#model-legacy) "pipelines": "description": |- - The pipeline definitions for the bundle. + The pipeline definitions for the bundle, where each key is the name of the pipeline. "markdown_description": |- - The pipeline definitions for the bundle. See [_](/dev-tools/bundles/resources.md#pipeline) + The pipeline definitions for the bundle, where each key is the name of the pipeline. See [_](/dev-tools/bundles/resources.md#pipeline) "quality_monitors": "description": |- - The quality monitor definitions for the bundle. + The quality monitor definitions for the bundle, where each key is the name of the quality monitor. "markdown_description": |- - The quality monitor definitions for the bundle. See [_](/dev-tools/bundles/resources.md#quality_monitor) + The quality monitor definitions for the bundle, where each key is the name of the quality monitor. See [_](/dev-tools/bundles/resources.md#quality-monitor) "registered_models": "description": |- - The registered model definitions for the bundle. + The registered model definitions for the bundle, where each key is the name of the registered model. "markdown_description": |- - The registered model definitions for the bundle. See [_](/dev-tools/bundles/resources.md#registered_model) + The registered model definitions for the bundle, where each key is the name of the registered model. See [_](/dev-tools/bundles/resources.md#registered-model) "schemas": "description": |- - The schema definitions for the bundle. + The schema definitions for the bundle, where each key is the name of the schema. "markdown_description": |- - The schema definitions for the bundle. See [_](/dev-tools/bundles/resources.md#schema) + The schema definitions for the bundle, where each key is the name of the schema. See [_](/dev-tools/bundles/resources.md#schema) "volumes": "description": |- - PLACEHOLDER -github.com/databricks/cli/bundle/config.Root: + The volume definitions for the bundle, where each key is the name of the volume. + "markdown_description": |- + The volume definitions for the bundle, where each key is the name of the volume. See [_](/dev-tools/bundles/resources.md#volume) "artifacts": "description": |- Defines the attributes to build an artifact + "markdown_description": |- + Defines the attributes to build artifacts, where each key is the name of the artifact, and the value is a Map that defines the artifact build settings. For information about the `artifacts` mapping, see [_](/dev-tools/bundles/settings.md#artifacts). + + Artifact settings defined in the top level of the bundle configuration can be overridden in the `targets` mapping. See [_](/dev-tools/bundles/artifact-overrides.md). "markdown_examples": |- ```yaml artifacts: @@ -195,7 +194,7 @@ github.com/databricks/cli/bundle/config.Root: "description": |- The attributes of the bundle. "markdown_description": |- - The attributes of the bundle. See [_](/dev-tools/bundles/settings.md#bundle) + The attributes of the bundle. See [_](/dev-tools/bundles/settings.md#bundle). "experimental": "description": |- Defines attributes for experimental features. @@ -206,9 +205,11 @@ github.com/databricks/cli/bundle/config.Root: Specifies a list of path globs that contain configuration files to include within the bundle. See [_](/dev-tools/bundles/settings.md#include) "permissions": "description": |- - Defines the permissions to apply to experiments, jobs, pipelines, and models defined in the bundle + Defines a permission for a specific entity. "markdown_description": |- - Defines the permissions to apply to experiments, jobs, pipelines, and models defined in the bundle. See [_](/dev-tools/bundles/settings.md#permissions) and [_](/dev-tools/bundles/permissions.md). + A Sequence that defines the permissions to apply to experiments, jobs, pipelines, and models defined in the bundle, where each item in the sequence is a permission for a specific entity. + + See [_](/dev-tools/bundles/settings.md#permissions) and [_](/dev-tools/bundles/permissions.md). "markdown_examples": |- ```yaml permissions: @@ -226,12 +227,21 @@ github.com/databricks/cli/bundle/config.Root: Defines bundle deployment presets. See [_](/dev-tools/bundles/deployment-modes.md#presets). "resources": "description": |- - Specifies information about the Databricks resources used by the bundle + A Map that defines the resources for the bundle, where each key is the name of the resource, and the value is a Map that defines the resource. "markdown_description": |- - Specifies information about the Databricks resources used by the bundle. See [_](/dev-tools/bundles/resources.md). + A Map that defines the resources for the bundle, where each key is the name of the resource, and the value is a Map that defines the resource. For more information about supported resources, and resource definition reference, see [_](/dev-tools/bundles/resources.md). + + ```yaml + resources: + s: + : + : + ``` "run_as": "description": |- - The identity to use to run the bundle. + The identity to use when running workflows. + "markdown_description": |- + The identity to use when running workflows. See [_](/dev-tools/bundles/run-as.md). "sync": "description": |- The files and file paths to include or exclude in the bundle. @@ -376,6 +386,11 @@ github.com/databricks/cli/bundle/config/resources.Grant: "description": |- The privileges to grant to the specified entity github.com/databricks/cli/bundle/config/resources.Permission: + "-": + "description": |- + Defines a permission for a specific entity. + "markdown_description": |- + Defines a permission for a specific entity. See [_](/dev-tools/bundles/settings.md#permissions) and [_](/dev-tools/bundles/permissions.md). "group_name": "description": |- The name of the group that has the permission set in level. From d5d433e9d7fd62bbc08e72d227023c9412f761a1 Mon Sep 17 00:00:00 2001 From: Ilya Kuznetsov Date: Fri, 10 Jan 2025 16:37:22 +0100 Subject: [PATCH 17/26] fix: More descriptions --- bundle/internal/schema/annotations.go | 4 +- bundle/internal/schema/annotations.yml | 32 ++++--- bundle/schema/jsonschema.json | 114 +++++++++++++------------ 3 files changed, 78 insertions(+), 72 deletions(-) diff --git a/bundle/internal/schema/annotations.go b/bundle/internal/schema/annotations.go index 3ca1b51be0..3313f7b1f0 100644 --- a/bundle/internal/schema/annotations.go +++ b/bundle/internal/schema/annotations.go @@ -102,7 +102,7 @@ func (d *annotationHandler) syncWithMissingAnnotations(outputPath string) error return err } - var outputTyped annotationFile + var outputTyped annotation.File err = convert.ToTyped(&outputTyped, output) if err != nil { return err @@ -132,7 +132,7 @@ func assignAnnotation(s *jsonschema.Schema, a annotation.Descriptor) { s.Enum = a.Enum } -func saveYamlWithStyle(outputPath string, annotations annotationFile) error { +func saveYamlWithStyle(outputPath string, annotations annotation.File) error { annotationOrder := yamlsaver.NewOrder([]string{"description", "markdown_description", "title", "default", "enum"}) style := map[string]yaml3.Style{} diff --git a/bundle/internal/schema/annotations.yml b/bundle/internal/schema/annotations.yml index 74fb6e0051..ff9c01749e 100644 --- a/bundle/internal/schema/annotations.yml +++ b/bundle/internal/schema/annotations.yml @@ -196,6 +196,7 @@ github.com/databricks/cli/bundle/config.Resources: The volume definitions for the bundle, where each key is the name of the volume. "markdown_description": |- The volume definitions for the bundle, where each key is the name of the volume. See [_](/dev-tools/bundles/resources.md#volume) +github.com/databricks/cli/bundle/config.Root: "artifacts": "description": |- Defines the attributes to build an artifact @@ -213,9 +214,9 @@ github.com/databricks/cli/bundle/config.Resources: ``` "bundle": "description": |- - The attributes of the bundle. + The bundle attributes when deploying to this target. "markdown_description": |- - The attributes of the bundle. See [_](/dev-tools/bundles/settings.md#bundle). + The bundle attributes when deploying to this target, "experimental": "description": |- Defines attributes for experimental features. @@ -267,16 +268,20 @@ github.com/databricks/cli/bundle/config.Resources: "description": |- The files and file paths to include or exclude in the bundle. "markdown_description": |- - The files and file paths to include or exclude in the bundle. See [_](/dev-tools/bundles/) + The files and file paths to include or exclude in the bundle. See [_](/dev-tools/bundles/settings.md#sync). "targets": "description": |- Defines deployment targets for the bundle. + "markdown_description": |- + Defines deployment targets for the bundle. See [_](/dev-tools/bundles/settings.md#targets) "variables": "description": |- A Map that defines the custom variables for the bundle, where each key is the name of the variable, and the value is a Map that defines the variable. "workspace": "description": |- Defines the Databricks workspace for the bundle. + "markdown_description": |- + Defines the Databricks workspace for the bundle. See [_](/dev-tools/bundles/settings.md#workspace). github.com/databricks/cli/bundle/config.Sync: "exclude": "description": |- @@ -295,7 +300,7 @@ github.com/databricks/cli/bundle/config.Target: The artifacts to include in the target deployment. See [_](#artifact) "bundle": "description": |- - The name of the bundle when deploying to this target. + The bundle attributes when deploying to this target. "cluster_id": "description": |- The ID of the cluster to use for this target. @@ -308,8 +313,6 @@ github.com/databricks/cli/bundle/config.Target: "git": "description": |- The Git version control settings for the target. - "markdown_description": |- - The Git version control settings for the target. See [_](#git). "mode": "description": |- The deployment mode for the target. @@ -318,23 +321,17 @@ github.com/databricks/cli/bundle/config.Target: "permissions": "description": |- The permissions for deploying and running the bundle in the target. - "markdown_description": |- - The permissions for deploying and running the bundle in the target. See [_](#permission). "presets": "description": |- The deployment presets for the target. - "markdown_description": |- - The deployment presets for the target. See [_](#preset). "resources": "description": |- The resource definitions for the target. - "markdown_description": |- - The resource definitions for the target. See [_](#resources). "run_as": "description": |- The identity to use to run the bundle. "markdown_description": |- - The identity to use to run the bundle. See [_](#job_run_as) and [_](/dev-tools/bundles/run_as.md). + The identity to use to run the bundle, see [_](/dev-tools/bundles/run-as.md). "sync": "description": |- The local paths to sync to the target workspace when a bundle is run or deployed. @@ -344,7 +341,7 @@ github.com/databricks/cli/bundle/config.Target: "description": |- The custom variable definitions for the target. "markdown_description": |- - The custom variable definitions for the target. See [_](/dev-tools/bundles/settings.md#variables) and [_](/dev-tools/bundles/variables.md). + The custom variable definitions for the target. See [_](/dev-tools/bundles/variables.md). "workspace": "description": |- The Databricks workspace for the target. @@ -492,3 +489,10 @@ github.com/databricks/cli/bundle/config/variable.Variable: "type": "description": |- The type of the variable. +github.com/databricks/databricks-sdk-go/service/jobs.JobRunAs: + "service_principal_name": + "description": |- + The application ID of an active service principal. Setting this field requires the `servicePrincipal/user` role. + "user_name": + "description": |- + The email of an active workspace user. Non-admin users can only set this field to their own email. diff --git a/bundle/schema/jsonschema.json b/bundle/schema/jsonschema.json index 2f78ffcca2..5f172ee557 100644 --- a/bundle/schema/jsonschema.json +++ b/bundle/schema/jsonschema.json @@ -845,7 +845,8 @@ "catalog_name", "name", "schema_name" - ] + ], + "markdownDescription": "The volume resource type allows you to define and create Unity Catalog [volumes](https://docs.databricks.com/api/workspace/volumes/create) as part of a bundle. When deploying a bundle with a volume defined, note that:\n\n* A volume cannot be referenced in the `artifact_path` for the bundle until it exists in the workspace. Hence, if you want to use Databricks Asset Bundles to create the volume, you must first define the volume in the bundle, deploy it to create the volume, then reference it in the `artifact_path`` in subsequent deployments.\n\n* Volumes in the bundle are not prepended with the `dev_${workspace.current_user.short_name}` prefix when the deployment target has `mode: development`` configured. However, you can manually configure this prefix. See [custom-presets](https://docs.databricks.com/dev-tools/bundles/deployment-modes.html#custom-presets)" }, { "type": "string", @@ -961,26 +962,25 @@ "type": "object", "properties": { "build": { - "description": "An optional set of non-default build commands that you want to run locally before deployment.\n\nFor Python wheel builds, the Databricks CLI assumes that it can find a local install of the Python wheel package to run builds, and it runs the command python setup.py bdist_wheel by default during each bundle deployment.\n\nTo specify multiple build commands, separate each command with double-ampersand (\u0026\u0026) characters.", + "description": "An optional set of non-default build commands to run locally before deployment.", "$ref": "#/$defs/string" }, "executable": { - "description": "The executable type.", + "description": "The executable type. Valid values are `bash`, `sh`, and `cmd`.", "$ref": "#/$defs/github.com/databricks/cli/libs/exec.ExecutableType" }, "files": { "description": "The source files for the artifact.", - "$ref": "#/$defs/slice/github.com/databricks/cli/bundle/config.ArtifactFile", - "markdownDescription": "The source files for the artifact, defined as an [artifact_file](https://docs.databricks.com/dev-tools/bundles/reference.html#artifact_file)." + "$ref": "#/$defs/slice/github.com/databricks/cli/bundle/config.ArtifactFile" }, "path": { "description": "The location where the built artifact will be saved.", "$ref": "#/$defs/string" }, "type": { - "description": "The type of the artifact.", + "description": "Required. The type of the artifact.", "$ref": "#/$defs/github.com/databricks/cli/bundle/config.ArtifactType", - "markdownDescription": "The type of the artifact. Valid values are `wheel` or `jar`" + "markdownDescription": "Required. The type of the artifact. Valid values are `whl`." } }, "additionalProperties": false, @@ -1000,7 +1000,7 @@ "type": "object", "properties": { "source": { - "description": "The path of the files used to build the artifact.", + "description": "Required. The path of the files used to build the artifact.", "$ref": "#/$defs/string" } }, @@ -1105,15 +1105,15 @@ "$ref": "#/$defs/github.com/databricks/cli/bundle/config.Python" }, "python_wheel_wrapper": { - "description": "Whether to use a Python wheel wrapper", + "description": "Whether to use a Python wheel wrapper.", "$ref": "#/$defs/bool" }, "scripts": { - "description": "The commands to run", + "description": "The commands to run.", "$ref": "#/$defs/map/github.com/databricks/cli/bundle/config.Command" }, "use_legacy_run_as": { - "description": "Whether to use the legacy run_as behavior", + "description": "Whether to use the legacy run_as behavior.", "$ref": "#/$defs/bool" } }, @@ -1274,57 +1274,59 @@ "type": "object", "properties": { "clusters": { - "description": "The cluster definitions for the bundle.", + "description": "The cluster definitions for the bundle, where each key is the name of a cluster.", "$ref": "#/$defs/map/github.com/databricks/cli/bundle/config/resources.Cluster", - "markdownDescription": "The cluster definitions for the bundle. See [cluster](https://docs.databricks.com/dev-tools/bundles/resources.html#cluster)" + "markdownDescription": "The cluster definitions for the bundle, where each key is the name of a cluster. See [cluster](https://docs.databricks.com/dev-tools/bundles/resources.html#cluster)" }, "dashboards": { - "description": "The dashboard definitions for the bundle.", + "description": "The dashboard definitions for the bundle, where each key is the name of the dashboard.", "$ref": "#/$defs/map/github.com/databricks/cli/bundle/config/resources.Dashboard", - "markdownDescription": "The dashboard definitions for the bundle. See [dashboard](https://docs.databricks.com/dev-tools/bundles/resources.html#dashboard)" + "markdownDescription": "The dashboard definitions for the bundle, where each key is the name of the dashboard. See [dashboard](https://docs.databricks.com/dev-tools/bundles/resources.html#dashboard)" }, "experiments": { - "description": "The experiment definitions for the bundle.", + "description": "The experiment definitions for the bundle, where each key is the name of the experiment.", "$ref": "#/$defs/map/github.com/databricks/cli/bundle/config/resources.MlflowExperiment", - "markdownDescription": "The experiment definitions for the bundle. See [experiment](https://docs.databricks.com/dev-tools/bundles/resources.html#experiment)" + "markdownDescription": "The experiment definitions for the bundle, where each key is the name of the experiment. See [experiment](https://docs.databricks.com/dev-tools/bundles/resources.html#experiment)" }, "jobs": { - "description": "The job definitions for the bundle.", + "description": "The job definitions for the bundle, where each key is the name of the job.", "$ref": "#/$defs/map/github.com/databricks/cli/bundle/config/resources.Job", - "markdownDescription": "The job definitions for the bundle. See [job](https://docs.databricks.com/dev-tools/bundles/resources.html#job)" + "markdownDescription": "The job definitions for the bundle, where each key is the name of the job. See [job](https://docs.databricks.com/dev-tools/bundles/resources.html#job)" }, "model_serving_endpoints": { - "description": "The model serving endpoint definitions for the bundle.", + "description": "The model serving endpoint definitions for the bundle, where each key is the name of the model serving endpoint.", "$ref": "#/$defs/map/github.com/databricks/cli/bundle/config/resources.ModelServingEndpoint", - "markdownDescription": "The model serving endpoint definitions for the bundle. See [model_serving_endpoint](https://docs.databricks.com/dev-tools/bundles/resources.html#model_serving_endpoint)" + "markdownDescription": "The model serving endpoint definitions for the bundle, where each key is the name of the model serving endpoint. See [model_serving_endpoint](https://docs.databricks.com/dev-tools/bundles/resources.html#model_serving_endpoint)" }, "models": { - "description": "The model definitions for the bundle.", + "description": "The model definitions for the bundle, where each key is the name of the model.", "$ref": "#/$defs/map/github.com/databricks/cli/bundle/config/resources.MlflowModel", - "markdownDescription": "The model definitions for the bundle. See [model](https://docs.databricks.com/dev-tools/bundles/resources.html#model)" + "markdownDescription": "The model definitions for the bundle, where each key is the name of the model. See [model-legacy](https://docs.databricks.com/dev-tools/bundles/resources.html#model-legacy)" }, "pipelines": { - "description": "The pipeline definitions for the bundle.", + "description": "The pipeline definitions for the bundle, where each key is the name of the pipeline.", "$ref": "#/$defs/map/github.com/databricks/cli/bundle/config/resources.Pipeline", - "markdownDescription": "The pipeline definitions for the bundle. See [pipeline](https://docs.databricks.com/dev-tools/bundles/resources.html#pipeline)" + "markdownDescription": "The pipeline definitions for the bundle, where each key is the name of the pipeline. See [pipeline](https://docs.databricks.com/dev-tools/bundles/resources.html#pipeline)" }, "quality_monitors": { - "description": "The quality monitor definitions for the bundle.", + "description": "The quality monitor definitions for the bundle, where each key is the name of the quality monitor.", "$ref": "#/$defs/map/github.com/databricks/cli/bundle/config/resources.QualityMonitor", - "markdownDescription": "The quality monitor definitions for the bundle. See [quality_monitor](https://docs.databricks.com/dev-tools/bundles/resources.html#quality_monitor)" + "markdownDescription": "The quality monitor definitions for the bundle, where each key is the name of the quality monitor. See [quality-monitor](https://docs.databricks.com/dev-tools/bundles/resources.html#quality-monitor)" }, "registered_models": { - "description": "The registered model definitions for the bundle.", + "description": "The registered model definitions for the bundle, where each key is the name of the \u003cUC\u003e registered model.", "$ref": "#/$defs/map/github.com/databricks/cli/bundle/config/resources.RegisteredModel", - "markdownDescription": "The registered model definitions for the bundle. See [registered_model](https://docs.databricks.com/dev-tools/bundles/resources.html#registered_model)" + "markdownDescription": "The registered model definitions for the bundle, where each key is the name of the \u003cUC\u003e registered model. See [registered-model](https://docs.databricks.com/dev-tools/bundles/resources.html#registered-model)" }, "schemas": { - "description": "The schema definitions for the bundle.", + "description": "The schema definitions for the bundle, where each key is the name of the schema.", "$ref": "#/$defs/map/github.com/databricks/cli/bundle/config/resources.Schema", - "markdownDescription": "The schema definitions for the bundle. See [schema](https://docs.databricks.com/dev-tools/bundles/resources.html#schema)" + "markdownDescription": "The schema definitions for the bundle, where each key is the name of the schema. See [schema](https://docs.databricks.com/dev-tools/bundles/resources.html#schema)" }, "volumes": { - "$ref": "#/$defs/map/github.com/databricks/cli/bundle/config/resources.Volume" + "description": "The volume definitions for the bundle, where each key is the name of the volume.", + "$ref": "#/$defs/map/github.com/databricks/cli/bundle/config/resources.Volume", + "markdownDescription": "The volume definitions for the bundle, where each key is the name of the volume. See [volume](https://docs.databricks.com/dev-tools/bundles/resources.html#volume)" } }, "additionalProperties": false @@ -1372,7 +1374,7 @@ "markdownDescription": "The artifacts to include in the target deployment. See [artifact](https://docs.databricks.com/dev-tools/bundles/reference.html#artifact)" }, "bundle": { - "description": "The name of the bundle when deploying to this target.", + "description": "The bundle attributes when deploying to this target.", "$ref": "#/$defs/github.com/databricks/cli/bundle/config.Bundle" }, "cluster_id": { @@ -1389,8 +1391,7 @@ }, "git": { "description": "The Git version control settings for the target.", - "$ref": "#/$defs/github.com/databricks/cli/bundle/config.Git", - "markdownDescription": "The Git version control settings for the target. See [git](https://docs.databricks.com/dev-tools/bundles/reference.html#git)." + "$ref": "#/$defs/github.com/databricks/cli/bundle/config.Git" }, "mode": { "description": "The deployment mode for the target.", @@ -1399,23 +1400,20 @@ }, "permissions": { "description": "The permissions for deploying and running the bundle in the target.", - "$ref": "#/$defs/slice/github.com/databricks/cli/bundle/config/resources.Permission", - "markdownDescription": "The permissions for deploying and running the bundle in the target. See [permission](https://docs.databricks.com/dev-tools/bundles/reference.html#permission)." + "$ref": "#/$defs/slice/github.com/databricks/cli/bundle/config/resources.Permission" }, "presets": { "description": "The deployment presets for the target.", - "$ref": "#/$defs/github.com/databricks/cli/bundle/config.Presets", - "markdownDescription": "The deployment presets for the target. See [preset](https://docs.databricks.com/dev-tools/bundles/reference.html#preset)." + "$ref": "#/$defs/github.com/databricks/cli/bundle/config.Presets" }, "resources": { "description": "The resource definitions for the target.", - "$ref": "#/$defs/github.com/databricks/cli/bundle/config.Resources", - "markdownDescription": "The resource definitions for the target. See [resources](https://docs.databricks.com/dev-tools/bundles/reference.html#resources)." + "$ref": "#/$defs/github.com/databricks/cli/bundle/config.Resources" }, "run_as": { "description": "The identity to use to run the bundle.", "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.JobRunAs", - "markdownDescription": "The identity to use to run the bundle. See [job_run_as](https://docs.databricks.com/dev-tools/bundles/reference.html#job_run_as) and [link](https://docs.databricks.com/dev-tools/bundles/run_as.html)." + "markdownDescription": "The identity to use to run the bundle, see [link](https://docs.databricks.com/dev-tools/bundles/run-as.html)." }, "sync": { "description": "The local paths to sync to the target workspace when a bundle is run or deployed.", @@ -1425,7 +1423,7 @@ "variables": { "description": "The custom variable definitions for the target.", "$ref": "#/$defs/map/github.com/databricks/cli/bundle/config/variable.TargetVariable", - "markdownDescription": "The custom variable definitions for the target. See [variables](https://docs.databricks.com/dev-tools/bundles/settings.html#variables) and [link](https://docs.databricks.com/dev-tools/bundles/variables.html)." + "markdownDescription": "The custom variable definitions for the target. See [link](https://docs.databricks.com/dev-tools/bundles/variables.html)." }, "workspace": { "description": "The Databricks workspace for the target.", @@ -3102,7 +3100,7 @@ "description": "Write-only setting. Specifies the user or service principal that the job runs as. If not specified, the job runs as the user who created the job.\n\nEither `user_name` or `service_principal_name` should be specified. If not, an error is thrown.", "properties": { "service_principal_name": { - "description": "Application ID of an active service principal. Setting this field requires the `servicePrincipal/user` role.", + "description": "The application ID of an active service principal. Setting this field requires the `servicePrincipal/user` role.", "$ref": "#/$defs/string" }, "user_name": { @@ -6406,12 +6404,13 @@ "properties": { "artifacts": { "description": "Defines the attributes to build an artifact", - "$ref": "#/$defs/map/github.com/databricks/cli/bundle/config.Artifact" + "$ref": "#/$defs/map/github.com/databricks/cli/bundle/config.Artifact", + "markdownDescription": "Defines the attributes to build artifacts, where each key is the name of the artifact, and the value is a Map that defines the artifact build settings. For information about the `artifacts` mapping, see [artifacts](https://docs.databricks.com/dev-tools/bundles/settings.html#artifacts).\n\nArtifact settings defined in the top level of the bundle configuration can be overridden in the `targets` mapping. See [link](https://docs.databricks.com/dev-tools/bundles/artifact-overrides.html)." }, "bundle": { - "description": "The attributes of the bundle.", + "description": "The bundle attributes when deploying to this target.", "$ref": "#/$defs/github.com/databricks/cli/bundle/config.Bundle", - "markdownDescription": "The attributes of the bundle. See [bundle](https://docs.databricks.com/dev-tools/bundles/settings.html#bundle)" + "markdownDescription": "The bundle attributes when deploying to this target," }, "experimental": { "description": "Defines attributes for experimental features.", @@ -6423,9 +6422,9 @@ "markdownDescription": "Specifies a list of path globs that contain configuration files to include within the bundle. See [include](https://docs.databricks.com/dev-tools/bundles/settings.html#include)" }, "permissions": { - "description": "Defines the permissions to apply to experiments, jobs, pipelines, and models defined in the bundle", + "description": "Defines a permission for a specific entity.", "$ref": "#/$defs/slice/github.com/databricks/cli/bundle/config/resources.Permission", - "markdownDescription": "Defines the permissions to apply to experiments, jobs, pipelines, and models defined in the bundle. See [permissions](https://docs.databricks.com/dev-tools/bundles/settings.html#permissions) and [link](https://docs.databricks.com/dev-tools/bundles/permissions.html)." + "markdownDescription": "A Sequence that defines the permissions to apply to experiments, jobs, pipelines, and models defined in the bundle, where each item in the sequence is a permission for a specific entity.\n\nSee [permissions](https://docs.databricks.com/dev-tools/bundles/settings.html#permissions) and [link](https://docs.databricks.com/dev-tools/bundles/permissions.html)." }, "presets": { "description": "Defines bundle deployment presets.", @@ -6433,22 +6432,24 @@ "markdownDescription": "Defines bundle deployment presets. See [presets](https://docs.databricks.com/dev-tools/bundles/deployment-modes.html#presets)." }, "resources": { - "description": "Specifies information about the Databricks resources used by the bundle", + "description": "A Map that defines the resources for the bundle, where each key is the name of the resource, and the value is a Map that defines the resource.", "$ref": "#/$defs/github.com/databricks/cli/bundle/config.Resources", - "markdownDescription": "Specifies information about the Databricks resources used by the bundle. See [link](https://docs.databricks.com/dev-tools/bundles/resources.html)." + "markdownDescription": "A Map that defines the resources for the bundle, where each key is the name of the resource, and the value is a Map that defines the resource. For more information about \u003cDABS\u003e supported resources, and resource definition reference, see [link](https://docs.databricks.com/dev-tools/bundles/resources.html).\n\n```yaml\nresources:\n \u003cresource-type\u003es:\n \u003cresource-name\u003e:\n \u003cresource-field-name\u003e: \u003cresource-field-value\u003e\n```" }, "run_as": { - "description": "The identity to use to run the bundle.", - "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.JobRunAs" + "description": "The identity to use when running \u003cDABS\u003e workflows.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.JobRunAs", + "markdownDescription": "The identity to use when running \u003cDABS\u003e workflows. See [link](https://docs.databricks.com/dev-tools/bundles/run-as.html)." }, "sync": { "description": "The files and file paths to include or exclude in the bundle.", "$ref": "#/$defs/github.com/databricks/cli/bundle/config.Sync", - "markdownDescription": "The files and file paths to include or exclude in the bundle. See [link](https://docs.databricks.com/dev-tools/bundles/)" + "markdownDescription": "The files and file paths to include or exclude in the bundle. See [sync](https://docs.databricks.com/dev-tools/bundles/settings.html#sync)." }, "targets": { "description": "Defines deployment targets for the bundle.", - "$ref": "#/$defs/map/github.com/databricks/cli/bundle/config.Target" + "$ref": "#/$defs/map/github.com/databricks/cli/bundle/config.Target", + "markdownDescription": "Defines deployment targets for the bundle. See [targets](https://docs.databricks.com/dev-tools/bundles/settings.html#targets)" }, "variables": { "description": "A Map that defines the custom variables for the bundle, where each key is the name of the variable, and the value is a Map that defines the variable.", @@ -6456,7 +6457,8 @@ }, "workspace": { "description": "Defines the Databricks workspace for the bundle.", - "$ref": "#/$defs/github.com/databricks/cli/bundle/config.Workspace" + "$ref": "#/$defs/github.com/databricks/cli/bundle/config.Workspace", + "markdownDescription": "Defines the Databricks workspace for the bundle. See [workspace](https://docs.databricks.com/dev-tools/bundles/settings.html#workspace)." } }, "additionalProperties": false From 1fbec371e51e5c8e374e3df45c4a8b546c25def3 Mon Sep 17 00:00:00 2001 From: Ilya Kuznetsov Date: Fri, 10 Jan 2025 17:47:06 +0100 Subject: [PATCH 18/26] fix: Link --- bundle/internal/schema/annotations.go | 12 +- .../schema/annotations_openapi_overrides.yml | 229 +++++++++++++++++- bundle/internal/schema/annotations_test.go | 52 ++-- bundle/schema/jsonschema.json | 35 ++- 4 files changed, 286 insertions(+), 42 deletions(-) diff --git a/bundle/internal/schema/annotations.go b/bundle/internal/schema/annotations.go index 3313f7b1f0..b857fdddaf 100644 --- a/bundle/internal/schema/annotations.go +++ b/bundle/internal/schema/annotations.go @@ -184,15 +184,17 @@ func convertLinksToAbsoluteUrl(s string) string { referencePage := "/dev-tools/bundles/reference.html" // Regular expression to match Markdown-style links like [_](link) - re := regexp.MustCompile(`\[_\]\(([^)]+)\)`) + re := regexp.MustCompile(`\[(.*)\]\(([^)]+)\)`) result := re.ReplaceAllStringFunc(s, func(match string) string { matches := re.FindStringSubmatch(match) if len(matches) < 2 { return match } - link := matches[1] - var text, absoluteURL string + originalText := matches[1] + link := matches[2] + + var text, absoluteURL string if strings.HasPrefix(link, "#") { text = strings.TrimPrefix(link, "#") absoluteURL = fmt.Sprintf("%s%s%s", base, referencePage, link) @@ -210,6 +212,10 @@ func convertLinksToAbsoluteUrl(s string) string { return match } + if originalText != "_" { + text = originalText + } + return fmt.Sprintf("[%s](%s)", text, absoluteURL) }) diff --git a/bundle/internal/schema/annotations_openapi_overrides.yml b/bundle/internal/schema/annotations_openapi_overrides.yml index 21e8217b25..36314b777b 100644 --- a/bundle/internal/schema/annotations_openapi_overrides.yml +++ b/bundle/internal/schema/annotations_openapi_overrides.yml @@ -1,4 +1,34 @@ github.com/databricks/cli/bundle/config/resources.Cluster: + "_": + "markdown_description": |- + The cluster resource defines an [all-purpose cluster](/api/workspace/clusters/create). + + "markdown_examples": |- + The following example creates a cluster named `my_cluster` and sets that as the cluster to use to run the notebook in `my_job`: + + ```yaml + bundle: + name: clusters + + resources: + clusters: + my_cluster: + num_workers: 2 + node_type_id: "i3.xlarge" + autoscale: + min_workers: 2 + max_workers: 7 + spark_version: "13.3.x-scala2.12" + spark_conf: + "spark.executor.memory": "2g" + + jobs: + my_job: + tasks: + - task_key: test_task + notebook_task: + notebook_path: "./src/my_notebook.py" + ``` "data_security_mode": "description": |- PLACEHOLDER @@ -18,6 +48,24 @@ github.com/databricks/cli/bundle/config/resources.Cluster: "description": |- PLACEHOLDER github.com/databricks/cli/bundle/config/resources.Dashboard: + "_": + "markdown_description": |- + The dashboard resource allows you to manage [AI/BI dashboards](/api/workspace/lakeview/create) in a bundle. For information about AI/BI dashboards, see [_](/dashboards/index.md). + "markdown_examples": |- + The following example includes and deploys the sample __NYC Taxi Trip Analysis__ dashboard to the Databricks workspace. + + ``` yaml + resources: + dashboards: + nyc_taxi_trip_analysis: + display_name: "NYC Taxi Trip Analysis" + file_path: ../src/nyc_taxi_trip_analysis.lvdash.json + warehouse_id: ${var.warehouse_id} + ``` + If you use the UI to modify the dashboard, modifications made through the UI are not applied to the dashboard JSON file in the local bundle unless you explicitly update it using `bundle generate`. You can use the `--watch` option to continuously poll and retrieve changes to the dashboard. See [_](/dev-tools/cli/bundle-commands.md#generate). + + In addition, if you attempt to deploy a bundle that contains a dashboard JSON file that is different than the one in the remote workspace, an error will occur. To force the deploy and overwrite the dashboard in the remote workspace with the local one, use the `--force` option. See [_](/dev-tools/cli/bundle-commands.md#deploy). + "embed_credentials": "description": |- PLACEHOLDER @@ -28,6 +76,24 @@ github.com/databricks/cli/bundle/config/resources.Dashboard: "description": |- PLACEHOLDER github.com/databricks/cli/bundle/config/resources.Job: + "_": + "markdown_description": |- + The job resource allows you to define [jobs and their corresponding tasks](/api/workspace/jobs/create) in your bundle. For information about jobs, see [_](/jobs/index.md). For a tutorial that uses a template to create a job, see [_](/dev-tools/bundles/jobs-tutorial.md). + "markdown_examples": |- + The following example defines a job with the resource key `hello-job` with one notebook task: + + ```yaml + resources: + jobs: + hello-job: + name: hello-job + tasks: + - task_key: hello-task + notebook_task: + notebook_path: ./hello.py + ``` + + For information about defining job tasks and overriding job settings, see [_](/dev-tools/bundles/job-task-types.md), [_](/dev-tools/bundles/job-task-override.md), and [_](/dev-tools/bundles/cluster-override.md). "health": "description": |- PLACEHOLDER @@ -38,6 +104,22 @@ github.com/databricks/cli/bundle/config/resources.Job: "description": |- PLACEHOLDER github.com/databricks/cli/bundle/config/resources.MlflowExperiment: + "_": + "markdown_description": |- + The experiment resource allows you to define [MLflow experiments](/api/workspace/experiments/createexperiment) in a bundle. For information about MLflow experiments, see [_](/mlflow/experiments.md). + "markdown_examples": |- + The following example defines an experiment that all users can view: + + ```yaml + resources: + experiments: + experiment: + name: my_ml_experiment + permissions: + - level: CAN_READ + group_name: users + description: MLflow experiment used to track runs + ``` "permissions": "description": |- PLACEHOLDER @@ -46,22 +128,159 @@ github.com/databricks/cli/bundle/config/resources.MlflowModel: "description": |- PLACEHOLDER github.com/databricks/cli/bundle/config/resources.ModelServingEndpoint: + "_": + "markdown_description": |- + The model_serving_endpoint resource allows you to define [model serving endpoints](/api/workspace/servingendpoints/create). See [_](/machine-learning/model-serving/manage-serving-endpoints.md). + "markdown_examples": |- + The following example defines a model serving endpoint: + + ```yaml + resources: + model_serving_endpoints: + uc_model_serving_endpoint: + name: "uc-model-endpoint" + config: + served_entities: + - entity_name: "myCatalog.mySchema.my-ads-model" + entity_version: "10" + workload_size: "Small" + scale_to_zero_enabled: "true" + traffic_config: + routes: + - served_model_name: "my-ads-model-10" + traffic_percentage: "100" + tags: + - key: "team" + value: "data science" + ``` "permissions": "description": |- PLACEHOLDER github.com/databricks/cli/bundle/config/resources.Pipeline: + "_": + "markdown_description": |- + The pipeline resource allows you to create [pipelines](/api/workspace/pipelines/create). For information about pipelines, see [_](/delta-live-tables/index.md). For a tutorial that uses the template to create a pipeline, see [_](/dev-tools/bundles/pipelines-tutorial.md). + "markdown_examples": |- + The following example defines a pipeline with the resource key `hello-pipeline`: + + ```yaml + resources: + pipelines: + hello-pipeline: + name: hello-pipeline + clusters: + - label: default + num_workers: 1 + development: true + continuous: false + channel: CURRENT + edition: CORE + photon: false + libraries: + - notebook: + path: ./pipeline.py + ``` "permissions": "description": |- PLACEHOLDER github.com/databricks/cli/bundle/config/resources.QualityMonitor: + "_": + "markdown_description": |- + The quality_monitor resource allows you to define a [table monitor](/api/workspace/qualitymonitors/create). For information about monitors, see [_](/machine-learning/model-serving/monitor-diagnose-endpoints.md). + "markdown_examples": |- + The following example defines a quality monitor: + + ```yaml + resources: + quality_monitors: + my_quality_monitor: + table_name: dev.mlops_schema.predictions + output_schema_name: ${bundle.target}.mlops_schema + assets_dir: /Users/${workspace.current_user.userName}/databricks_lakehouse_monitoring + inference_log: + granularities: [1 day] + model_id_col: model_id + prediction_col: prediction + label_col: price + problem_type: PROBLEM_TYPE_REGRESSION + timestamp_col: timestamp + schedule: + quartz_cron_expression: 0 0 8 * * ? # Run Every day at 8am + timezone_id: UTC + ``` "table_name": "description": |- PLACEHOLDER github.com/databricks/cli/bundle/config/resources.RegisteredModel: + "_": + "markdown_description": |- + The registered model resource allows you to define models in . For information about [registered models](/api/workspace/registeredmodels/create), see [_](/machine-learning/manage-model-lifecycle/index.md). + "markdown_examples": |- + The following example defines a registered model in : + + ```yaml + resources: + registered_models: + model: + name: my_model + catalog_name: ${bundle.target} + schema_name: mlops_schema + comment: Registered model in Unity Catalog for ${bundle.target} deployment target + grants: + - privileges: + - EXECUTE + principal: account users + ``` "grants": "description": |- PLACEHOLDER github.com/databricks/cli/bundle/config/resources.Schema: + "_": + "markdown_description": |- + The schema resource type allows you to define [schemas](/api/workspace/schemas/create) for tables and other assets in your workflows and pipelines created as part of a bundle. A schema, different from other resource types, has the following limitations: + + - The owner of a schema resource is always the deployment user, and cannot be changed. If `run_as` is specified in the bundle, it will be ignored by operations on the schema. + - Only fields supported by the corresponding [Schemas object create API](/api/workspace/schemas/create) are available for the schema resource. For example, `enable_predictive_optimization` is not supported as it is only available on the [update API](/api/workspace/schemas/update). + "markdown_examples": |- + The following example defines a pipeline with the resource key `my_pipeline` that creates a schema with the key `my_schema` as the target: + + ```yaml + resources: + pipelines: + my_pipeline: + name: test-pipeline-{{.unique_id}} + libraries: + - notebook: + path: ./nb.sql + development: true + catalog: main + target: ${resources.schemas.my_schema.id} + + schemas: + my_schema: + name: test-schema-{{.unique_id}} + catalog_name: main + comment: This schema was created by DABs. + ``` + + A top-level grants mapping is not supported by , so if you want to set grants for a schema, define the grants for the schema within the `schemas` mapping. For more information about grants, see [_](/data-governance/unity-catalog/manage-privileges/index.md#grant). + + The following example defines a schema with grants: + + ```yaml + resources: + schemas: + my_schema: + name: test-schema + grants: + - principal: users + privileges: + - CAN_MANAGE + - principal: my_team + privileges: + - CAN_READ + catalog_name: main + ``` "grants": "description": |- PLACEHOLDER @@ -71,14 +290,14 @@ github.com/databricks/cli/bundle/config/resources.Schema: github.com/databricks/cli/bundle/config/resources.Volume: "_": "markdown_description": |- - The volume resource type allows you to define and create Unity Catalog [volumes](https://docs.databricks.com/api/workspace/volumes/create) as part of a bundle. When deploying a bundle with a volume defined, note that: + The volume resource type allows you to define and create [volumes](/api/workspace/volumes/create) as part of a bundle. When deploying a bundle with a volume defined, note that: - * A volume cannot be referenced in the `artifact_path` for the bundle until it exists in the workspace. Hence, if you want to use Databricks Asset Bundles to create the volume, you must first define the volume in the bundle, deploy it to create the volume, then reference it in the `artifact_path`` in subsequent deployments. + - A volume cannot be referenced in the `artifact_path` for the bundle until it exists in the workspace. Hence, if you want to use to create the volume, you must first define the volume in the bundle, deploy it to create the volume, then reference it in the `artifact_path` in subsequent deployments. - * Volumes in the bundle are not prepended with the `dev_${workspace.current_user.short_name}` prefix when the deployment target has `mode: development`` configured. However, you can manually configure this prefix. See [_](/dev-tools/bundles/deployment-modes.md#custom-presets) + - Volumes in the bundle are not prepended with the `dev_${workspace.current_user.short_name}` prefix when the deployment target has `mode: development` configured. However, you can manually configure this prefix. See [_](/dev-tools/bundles/deployment-modes.md#custom-presets). "markdown_examples": |- - The following example creates a Unity Catalog volume with the key `my_volume``: + The following example creates a volume with the key `my_volume`: ```yaml resources: @@ -88,6 +307,8 @@ github.com/databricks/cli/bundle/config/resources.Volume: name: my_volume schema_name: my_schema ``` + + For an example bundle that runs a job that writes to a file in volume, see the [bundle-examples GitHub repository](https://github.com/databricks/bundle-examples/tree/main/knowledge_base/write_from_job_to_volume). "grants": "description": |- PLACEHOLDER diff --git a/bundle/internal/schema/annotations_test.go b/bundle/internal/schema/annotations_test.go index d7e2fea7cf..782d2d6349 100644 --- a/bundle/internal/schema/annotations_test.go +++ b/bundle/internal/schema/annotations_test.go @@ -9,29 +9,37 @@ func TestConvertLinksToAbsoluteUrl(t *testing.T) { input string expected string }{ + // { + // input: "", + // expected: "", + // }, + // { + // input: "Some text (not a link)", + // expected: "Some text (not a link)", + // }, + // { + // input: "This is a link to [_](#section)", + // expected: "This is a link to [section](https://docs.databricks.com/dev-tools/bundles/reference.html#section)", + // }, + // { + // input: "This is a link to [_](/dev-tools/bundles/resources.html#dashboard)", + // expected: "This is a link to [dashboard](https://docs.databricks.com/dev-tools/bundles/resources.html#dashboard)", + // }, + // { + // input: "This is a link to [_](/dev-tools/bundles/resources.html)", + // expected: "This is a link to [link](https://docs.databricks.com/dev-tools/bundles/resources.html)", + // }, + // { + // input: "This is a link to [external](https://external.com)", + // expected: "This is a link to [external](https://external.com)", + // }, + // { + // input: "This is a link to [pipelines](/api/workspace/pipelines/create)", + // expected: "This is a link to [pipelines](https://docs.databricks.com/api/workspace/pipelines/create)", + // }, { - input: "", - expected: "", - }, - { - input: "Some text (not a link)", - expected: "Some text (not a link)", - }, - { - input: "This is a link to [_](#section)", - expected: "This is a link to [section](https://docs.databricks.com/dev-tools/bundles/reference.html#section)", - }, - { - input: "This is a link to [_](/dev-tools/bundles/resources.html#dashboard)", - expected: "This is a link to [dashboard](https://docs.databricks.com/dev-tools/bundles/resources.html#dashboard)", - }, - { - input: "This is a link to [_](/dev-tools/bundles/resources.html)", - expected: "This is a link to [link](https://docs.databricks.com/dev-tools/bundles/resources.html)", - }, - { - input: "This is a link to [external](https://external.com)", - expected: "This is a link to [external](https://external.com)", + input: "The registered model resource allows you to define models in \u003cUC\u003e. For information about \u003cUC\u003e [registered models](/api/workspace/registeredmodels/create), [registered models 2](/api/workspace/registeredmodels/create)", + expected: "The registered model resource allows you to define models in \u003cUC\u003e. For information about \u003cUC\u003e [registered models](/api/workspace/registeredmodels/create), [registered models 2](/api/workspace/registeredmodels/create)", }, } diff --git a/bundle/schema/jsonschema.json b/bundle/schema/jsonschema.json index 5f172ee557..4cc8d0d46b 100644 --- a/bundle/schema/jsonschema.json +++ b/bundle/schema/jsonschema.json @@ -183,7 +183,8 @@ "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/compute.WorkloadType" } }, - "additionalProperties": false + "additionalProperties": false, + "markdownDescription": "The cluster resource defines an [all-purpose cluster](https://docs.databricks.com/api/workspace/clusters/create)." }, { "type": "string", @@ -246,7 +247,8 @@ "$ref": "#/$defs/string" } }, - "additionalProperties": false + "additionalProperties": false, + "markdownDescription": "The dashboard resource allows you to manage [AI/BI dashboards](/api/workspace/lakeview/create) in a bundle. For information about AI/BI dashboards, see [_](https://docs.databricks.com/dashboards/index.html)." }, { "type": "string", @@ -367,7 +369,8 @@ "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.WebhookNotifications" } }, - "additionalProperties": false + "additionalProperties": false, + "markdownDescription": "The job resource allows you to define [jobs and their corresponding tasks](/api/workspace/jobs/create) in your bundle. For information about jobs, see [_](/jobs/index.md). For a tutorial that uses a \u003cDABS\u003e template to create a job, see [_](https://docs.databricks.com/dev-tools/bundles/jobs-tutorial.html)." }, { "type": "string", @@ -412,7 +415,8 @@ "$ref": "#/$defs/slice/github.com/databricks/databricks-sdk-go/service/ml.ExperimentTag" } }, - "additionalProperties": false + "additionalProperties": false, + "markdownDescription": "The experiment resource allows you to define [MLflow experiments](/api/workspace/experiments/createexperiment) in a bundle. For information about MLflow experiments, see [_](https://docs.databricks.com/mlflow/experiments.html)." }, { "type": "string", @@ -502,7 +506,8 @@ "required": [ "config", "name" - ] + ], + "markdownDescription": "The model_serving_endpoint resource allows you to define [model serving endpoints](/api/workspace/servingendpoints/create). See [_](https://docs.databricks.com/machine-learning/model-serving/manage-serving-endpoints.html)." }, { "type": "string", @@ -644,7 +649,8 @@ "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/pipelines.PipelineTrigger" } }, - "additionalProperties": false + "additionalProperties": false, + "markdownDescription": "The pipeline resource allows you to create \u003cDLT\u003e [pipelines](/api/workspace/pipelines/create). For information about pipelines, see [_](/delta-live-tables/index.md). For a tutorial that uses the \u003cDABS\u003e template to create a pipeline, see [_](https://docs.databricks.com/dev-tools/bundles/pipelines-tutorial.html)." }, { "type": "string", @@ -718,7 +724,8 @@ "table_name", "assets_dir", "output_schema_name" - ] + ], + "markdownDescription": "The quality_monitor resource allows you to define a \u003cUC\u003e [table monitor](/api/workspace/qualitymonitors/create). For information about monitors, see [_](https://docs.databricks.com/machine-learning/model-serving/monitor-diagnose-endpoints.html)." }, { "type": "string", @@ -760,7 +767,8 @@ "catalog_name", "name", "schema_name" - ] + ], + "markdownDescription": "The registered model resource allows you to define models in \u003cUC\u003e. For information about \u003cUC\u003e [registered models](/api/workspace/registeredmodels/create), see [_](https://docs.databricks.com/machine-learning/manage-model-lifecycle/index.html)." }, { "type": "string", @@ -800,7 +808,8 @@ "required": [ "catalog_name", "name" - ] + ], + "markdownDescription": "The schema resource type allows you to define \u003cUC\u003e [schemas](https://docs.databricks.com/api/workspace/schemas/create) for tables and other assets in your workflows and pipelines created as part of a bundle. A schema, different from other resource types, has the following limitations:\n\n- The owner of a schema resource is always the deployment user, and cannot be changed. If `run_as` is specified in the bundle, it will be ignored by operations on the schema.\n- Only fields supported by the corresponding [Schemas object create API](/api/workspace/schemas/create) are available for the schema resource. For example, `enable_predictive_optimization` is not supported as it is only available on the [update API](https://docs.databricks.com/api/workspace/schemas/update)." }, { "type": "string", @@ -846,7 +855,7 @@ "name", "schema_name" ], - "markdownDescription": "The volume resource type allows you to define and create Unity Catalog [volumes](https://docs.databricks.com/api/workspace/volumes/create) as part of a bundle. When deploying a bundle with a volume defined, note that:\n\n* A volume cannot be referenced in the `artifact_path` for the bundle until it exists in the workspace. Hence, if you want to use Databricks Asset Bundles to create the volume, you must first define the volume in the bundle, deploy it to create the volume, then reference it in the `artifact_path`` in subsequent deployments.\n\n* Volumes in the bundle are not prepended with the `dev_${workspace.current_user.short_name}` prefix when the deployment target has `mode: development`` configured. However, you can manually configure this prefix. See [custom-presets](https://docs.databricks.com/dev-tools/bundles/deployment-modes.html#custom-presets)" + "markdownDescription": "The volume resource type allows you to define and create \u003cUC\u003e [volumes](https://docs.databricks.com/api/workspace/volumes/create) as part of a bundle. When deploying a bundle with a volume defined, note that:\n\n- A volume cannot be referenced in the `artifact_path` for the bundle until it exists in the workspace. Hence, if you want to use \u003cDABS\u003e to create the volume, you must first define the volume in the bundle, deploy it to create the volume, then reference it in the `artifact_path` in subsequent deployments.\n\n- Volumes in the bundle are not prepended with the `dev_${workspace.current_user.short_name}` prefix when the deployment target has `mode: development` configured. However, you can manually configure this prefix. See [custom-presets](https://docs.databricks.com/dev-tools/bundles/deployment-modes.html#custom-presets)." }, { "type": "string", @@ -1039,12 +1048,12 @@ "deployment": { "description": "The definition of the bundle deployment", "$ref": "#/$defs/github.com/databricks/cli/bundle/config.Deployment", - "markdownDescription": "The definition of the bundle deployment. For supported attributes, see [deployment](https://docs.databricks.com/dev-tools/bundles/reference.html#deployment) and [link](https://docs.databricks.com/dev-tools/bundles/deployment-modes.html)." + "markdownDescription": "The definition of the bundle deployment. For supported attributes, see [_](#deployment) and [_](https://docs.databricks.com/dev-tools/bundles/deployment-modes.html)." }, "git": { "description": "The Git version control details that are associated with your bundle.", "$ref": "#/$defs/github.com/databricks/cli/bundle/config.Git", - "markdownDescription": "The Git version control details that are associated with your bundle. For supported attributes, see [git](https://docs.databricks.com/dev-tools/bundles/reference.html#git) and [git](https://docs.databricks.com/dev-tools/bundles/settings.html#git)." + "markdownDescription": "The Git version control details that are associated with your bundle. For supported attributes, see [_](#git) and [_](https://docs.databricks.com/dev-tools/bundles/settings.html#git)." }, "name": { "description": "The name of the bundle.", @@ -6424,7 +6433,7 @@ "permissions": { "description": "Defines a permission for a specific entity.", "$ref": "#/$defs/slice/github.com/databricks/cli/bundle/config/resources.Permission", - "markdownDescription": "A Sequence that defines the permissions to apply to experiments, jobs, pipelines, and models defined in the bundle, where each item in the sequence is a permission for a specific entity.\n\nSee [permissions](https://docs.databricks.com/dev-tools/bundles/settings.html#permissions) and [link](https://docs.databricks.com/dev-tools/bundles/permissions.html)." + "markdownDescription": "A Sequence that defines the permissions to apply to experiments, jobs, pipelines, and models defined in the bundle, where each item in the sequence is a permission for a specific entity.\n\nSee [_](/dev-tools/bundles/settings.md#permissions) and [_](https://docs.databricks.com/dev-tools/bundles/permissions.html)." }, "presets": { "description": "Defines bundle deployment presets.", From 151a6f86dde4e469e12f065e9dc4c29d2e885769 Mon Sep 17 00:00:00 2001 From: Ilya Kuznetsov Date: Fri, 10 Jan 2025 17:51:22 +0100 Subject: [PATCH 19/26] fix: Multiple links --- bundle/internal/schema/annotations.go | 2 +- bundle/internal/schema/annotations_test.go | 56 ++++++++++------------ bundle/schema/jsonschema.json | 22 ++++----- 3 files changed, 38 insertions(+), 42 deletions(-) diff --git a/bundle/internal/schema/annotations.go b/bundle/internal/schema/annotations.go index b857fdddaf..54631835ec 100644 --- a/bundle/internal/schema/annotations.go +++ b/bundle/internal/schema/annotations.go @@ -184,7 +184,7 @@ func convertLinksToAbsoluteUrl(s string) string { referencePage := "/dev-tools/bundles/reference.html" // Regular expression to match Markdown-style links like [_](link) - re := regexp.MustCompile(`\[(.*)\]\(([^)]+)\)`) + re := regexp.MustCompile(`\[(.*?)\]\((.*?)\)`) result := re.ReplaceAllStringFunc(s, func(match string) string { matches := re.FindStringSubmatch(match) if len(matches) < 2 { diff --git a/bundle/internal/schema/annotations_test.go b/bundle/internal/schema/annotations_test.go index 782d2d6349..0e15933596 100644 --- a/bundle/internal/schema/annotations_test.go +++ b/bundle/internal/schema/annotations_test.go @@ -9,37 +9,33 @@ func TestConvertLinksToAbsoluteUrl(t *testing.T) { input string expected string }{ - // { - // input: "", - // expected: "", - // }, - // { - // input: "Some text (not a link)", - // expected: "Some text (not a link)", - // }, - // { - // input: "This is a link to [_](#section)", - // expected: "This is a link to [section](https://docs.databricks.com/dev-tools/bundles/reference.html#section)", - // }, - // { - // input: "This is a link to [_](/dev-tools/bundles/resources.html#dashboard)", - // expected: "This is a link to [dashboard](https://docs.databricks.com/dev-tools/bundles/resources.html#dashboard)", - // }, - // { - // input: "This is a link to [_](/dev-tools/bundles/resources.html)", - // expected: "This is a link to [link](https://docs.databricks.com/dev-tools/bundles/resources.html)", - // }, - // { - // input: "This is a link to [external](https://external.com)", - // expected: "This is a link to [external](https://external.com)", - // }, - // { - // input: "This is a link to [pipelines](/api/workspace/pipelines/create)", - // expected: "This is a link to [pipelines](https://docs.databricks.com/api/workspace/pipelines/create)", - // }, { - input: "The registered model resource allows you to define models in \u003cUC\u003e. For information about \u003cUC\u003e [registered models](/api/workspace/registeredmodels/create), [registered models 2](/api/workspace/registeredmodels/create)", - expected: "The registered model resource allows you to define models in \u003cUC\u003e. For information about \u003cUC\u003e [registered models](/api/workspace/registeredmodels/create), [registered models 2](/api/workspace/registeredmodels/create)", + input: "", + expected: "", + }, + { + input: "Some text (not a link)", + expected: "Some text (not a link)", + }, + { + input: "This is a link to [_](#section)", + expected: "This is a link to [section](https://docs.databricks.com/dev-tools/bundles/reference.html#section)", + }, + { + input: "This is a link to [_](/dev-tools/bundles/resources.html#dashboard)", + expected: "This is a link to [dashboard](https://docs.databricks.com/dev-tools/bundles/resources.html#dashboard)", + }, + { + input: "This is a link to [_](/dev-tools/bundles/resources.html)", + expected: "This is a link to [link](https://docs.databricks.com/dev-tools/bundles/resources.html)", + }, + { + input: "This is a link to [external](https://external.com)", + expected: "This is a link to [external](https://external.com)", + }, + { + input: "This is a link to [one](/relative), [two](/relative-2)", + expected: "This is a link to [one](https://docs.databricks.com/relative), [two](https://docs.databricks.com/relative-2)", }, } diff --git a/bundle/schema/jsonschema.json b/bundle/schema/jsonschema.json index 4cc8d0d46b..990a2f133f 100644 --- a/bundle/schema/jsonschema.json +++ b/bundle/schema/jsonschema.json @@ -248,7 +248,7 @@ } }, "additionalProperties": false, - "markdownDescription": "The dashboard resource allows you to manage [AI/BI dashboards](/api/workspace/lakeview/create) in a bundle. For information about AI/BI dashboards, see [_](https://docs.databricks.com/dashboards/index.html)." + "markdownDescription": "The dashboard resource allows you to manage [AI/BI dashboards](https://docs.databricks.com/api/workspace/lakeview/create) in a bundle. For information about AI/BI dashboards, see [link](https://docs.databricks.com/dashboards/index.html)." }, { "type": "string", @@ -370,7 +370,7 @@ } }, "additionalProperties": false, - "markdownDescription": "The job resource allows you to define [jobs and their corresponding tasks](/api/workspace/jobs/create) in your bundle. For information about jobs, see [_](/jobs/index.md). For a tutorial that uses a \u003cDABS\u003e template to create a job, see [_](https://docs.databricks.com/dev-tools/bundles/jobs-tutorial.html)." + "markdownDescription": "The job resource allows you to define [jobs and their corresponding tasks](https://docs.databricks.com/api/workspace/jobs/create) in your bundle. For information about jobs, see [link](https://docs.databricks.com/jobs/index.html). For a tutorial that uses a \u003cDABS\u003e template to create a job, see [link](https://docs.databricks.com/dev-tools/bundles/jobs-tutorial.html)." }, { "type": "string", @@ -416,7 +416,7 @@ } }, "additionalProperties": false, - "markdownDescription": "The experiment resource allows you to define [MLflow experiments](/api/workspace/experiments/createexperiment) in a bundle. For information about MLflow experiments, see [_](https://docs.databricks.com/mlflow/experiments.html)." + "markdownDescription": "The experiment resource allows you to define [MLflow experiments](https://docs.databricks.com/api/workspace/experiments/createexperiment) in a bundle. For information about MLflow experiments, see [link](https://docs.databricks.com/mlflow/experiments.html)." }, { "type": "string", @@ -507,7 +507,7 @@ "config", "name" ], - "markdownDescription": "The model_serving_endpoint resource allows you to define [model serving endpoints](/api/workspace/servingendpoints/create). See [_](https://docs.databricks.com/machine-learning/model-serving/manage-serving-endpoints.html)." + "markdownDescription": "The model_serving_endpoint resource allows you to define [model serving endpoints](https://docs.databricks.com/api/workspace/servingendpoints/create). See [link](https://docs.databricks.com/machine-learning/model-serving/manage-serving-endpoints.html)." }, { "type": "string", @@ -650,7 +650,7 @@ } }, "additionalProperties": false, - "markdownDescription": "The pipeline resource allows you to create \u003cDLT\u003e [pipelines](/api/workspace/pipelines/create). For information about pipelines, see [_](/delta-live-tables/index.md). For a tutorial that uses the \u003cDABS\u003e template to create a pipeline, see [_](https://docs.databricks.com/dev-tools/bundles/pipelines-tutorial.html)." + "markdownDescription": "The pipeline resource allows you to create \u003cDLT\u003e [pipelines](https://docs.databricks.com/api/workspace/pipelines/create). For information about pipelines, see [link](https://docs.databricks.com/delta-live-tables/index.html). For a tutorial that uses the \u003cDABS\u003e template to create a pipeline, see [link](https://docs.databricks.com/dev-tools/bundles/pipelines-tutorial.html)." }, { "type": "string", @@ -725,7 +725,7 @@ "assets_dir", "output_schema_name" ], - "markdownDescription": "The quality_monitor resource allows you to define a \u003cUC\u003e [table monitor](/api/workspace/qualitymonitors/create). For information about monitors, see [_](https://docs.databricks.com/machine-learning/model-serving/monitor-diagnose-endpoints.html)." + "markdownDescription": "The quality_monitor resource allows you to define a \u003cUC\u003e [table monitor](https://docs.databricks.com/api/workspace/qualitymonitors/create). For information about monitors, see [link](https://docs.databricks.com/machine-learning/model-serving/monitor-diagnose-endpoints.html)." }, { "type": "string", @@ -768,7 +768,7 @@ "name", "schema_name" ], - "markdownDescription": "The registered model resource allows you to define models in \u003cUC\u003e. For information about \u003cUC\u003e [registered models](/api/workspace/registeredmodels/create), see [_](https://docs.databricks.com/machine-learning/manage-model-lifecycle/index.html)." + "markdownDescription": "The registered model resource allows you to define models in \u003cUC\u003e. For information about \u003cUC\u003e [registered models](https://docs.databricks.com/api/workspace/registeredmodels/create), see [link](https://docs.databricks.com/machine-learning/manage-model-lifecycle/index.html)." }, { "type": "string", @@ -809,7 +809,7 @@ "catalog_name", "name" ], - "markdownDescription": "The schema resource type allows you to define \u003cUC\u003e [schemas](https://docs.databricks.com/api/workspace/schemas/create) for tables and other assets in your workflows and pipelines created as part of a bundle. A schema, different from other resource types, has the following limitations:\n\n- The owner of a schema resource is always the deployment user, and cannot be changed. If `run_as` is specified in the bundle, it will be ignored by operations on the schema.\n- Only fields supported by the corresponding [Schemas object create API](/api/workspace/schemas/create) are available for the schema resource. For example, `enable_predictive_optimization` is not supported as it is only available on the [update API](https://docs.databricks.com/api/workspace/schemas/update)." + "markdownDescription": "The schema resource type allows you to define \u003cUC\u003e [schemas](https://docs.databricks.com/api/workspace/schemas/create) for tables and other assets in your workflows and pipelines created as part of a bundle. A schema, different from other resource types, has the following limitations:\n\n- The owner of a schema resource is always the deployment user, and cannot be changed. If `run_as` is specified in the bundle, it will be ignored by operations on the schema.\n- Only fields supported by the corresponding [Schemas object create API](https://docs.databricks.com/api/workspace/schemas/create) are available for the schema resource. For example, `enable_predictive_optimization` is not supported as it is only available on the [update API](https://docs.databricks.com/api/workspace/schemas/update)." }, { "type": "string", @@ -1048,12 +1048,12 @@ "deployment": { "description": "The definition of the bundle deployment", "$ref": "#/$defs/github.com/databricks/cli/bundle/config.Deployment", - "markdownDescription": "The definition of the bundle deployment. For supported attributes, see [_](#deployment) and [_](https://docs.databricks.com/dev-tools/bundles/deployment-modes.html)." + "markdownDescription": "The definition of the bundle deployment. For supported attributes, see [deployment](https://docs.databricks.com/dev-tools/bundles/reference.html#deployment) and [link](https://docs.databricks.com/dev-tools/bundles/deployment-modes.html)." }, "git": { "description": "The Git version control details that are associated with your bundle.", "$ref": "#/$defs/github.com/databricks/cli/bundle/config.Git", - "markdownDescription": "The Git version control details that are associated with your bundle. For supported attributes, see [_](#git) and [_](https://docs.databricks.com/dev-tools/bundles/settings.html#git)." + "markdownDescription": "The Git version control details that are associated with your bundle. For supported attributes, see [git](https://docs.databricks.com/dev-tools/bundles/reference.html#git) and [git](https://docs.databricks.com/dev-tools/bundles/settings.html#git)." }, "name": { "description": "The name of the bundle.", @@ -6433,7 +6433,7 @@ "permissions": { "description": "Defines a permission for a specific entity.", "$ref": "#/$defs/slice/github.com/databricks/cli/bundle/config/resources.Permission", - "markdownDescription": "A Sequence that defines the permissions to apply to experiments, jobs, pipelines, and models defined in the bundle, where each item in the sequence is a permission for a specific entity.\n\nSee [_](/dev-tools/bundles/settings.md#permissions) and [_](https://docs.databricks.com/dev-tools/bundles/permissions.html)." + "markdownDescription": "A Sequence that defines the permissions to apply to experiments, jobs, pipelines, and models defined in the bundle, where each item in the sequence is a permission for a specific entity.\n\nSee [permissions](https://docs.databricks.com/dev-tools/bundles/settings.html#permissions) and [link](https://docs.databricks.com/dev-tools/bundles/permissions.html)." }, "presets": { "description": "Defines bundle deployment presets.", From 4b01f6b1bcfa94ad775f3b0a8ea7476aeb28f024 Mon Sep 17 00:00:00 2001 From: Ilya Kuznetsov Date: Fri, 10 Jan 2025 18:09:18 +0100 Subject: [PATCH 20/26] fix: Add links --- bundle/internal/docs/docs.go | 1 - bundle/internal/schema/annotations.yml | 13 +++++-------- 2 files changed, 5 insertions(+), 9 deletions(-) diff --git a/bundle/internal/docs/docs.go b/bundle/internal/docs/docs.go index 07979ead0b..de32c118f0 100644 --- a/bundle/internal/docs/docs.go +++ b/bundle/internal/docs/docs.go @@ -180,7 +180,6 @@ func buildAttributeTable(m *md.Markdown, attributes []attributeNode) *md.Markdow func formatDescription(a attributeNode) string { s := strings.ReplaceAll(a.Description, "\n", " ") - return s if a.Reference != "" { if strings.HasSuffix(s, ".") { s += " " diff --git a/bundle/internal/schema/annotations.yml b/bundle/internal/schema/annotations.yml index ff9c01749e..f7e1aca962 100644 --- a/bundle/internal/schema/annotations.yml +++ b/bundle/internal/schema/annotations.yml @@ -57,8 +57,6 @@ github.com/databricks/cli/bundle/config.Deployment: "lock": "description": |- The deployment lock attributes. - "markdown_description": |- - The deployment lock attributes. See [_](#lock). github.com/databricks/cli/bundle/config.Experimental: "pydabs": "description": |- @@ -296,8 +294,6 @@ github.com/databricks/cli/bundle/config.Target: "artifacts": "description": |- The artifacts to include in the target deployment. - "markdown_description": |- - The artifacts to include in the target deployment. See [_](#artifact) "bundle": "description": |- The bundle attributes when deploying to this target. @@ -335,13 +331,9 @@ github.com/databricks/cli/bundle/config.Target: "sync": "description": |- The local paths to sync to the target workspace when a bundle is run or deployed. - "markdown_description": |- - The local paths to sync to the target workspace when a bundle is run or deployed. See [_](#sync). "variables": "description": |- The custom variable definitions for the target. - "markdown_description": |- - The custom variable definitions for the target. See [_](/dev-tools/bundles/variables.md). "workspace": "description": |- The Databricks workspace for the target. @@ -475,6 +467,11 @@ github.com/databricks/cli/bundle/config/variable.TargetVariable: "description": |- The type of the variable. github.com/databricks/cli/bundle/config/variable.Variable: + "_": + "description": |- + Defines a custom variable for the bundle. + "markdown_description": |- + Defines a custom variable for the bundle. See [_](/dev-tools/bundles/settings.md#variables). "default": "description": |- PLACEHOLDER From ee5db187c064b9e2fbb3e776c1ab7bc6085410d9 Mon Sep 17 00:00:00 2001 From: Ilya Kuznetsov Date: Fri, 10 Jan 2025 18:33:58 +0100 Subject: [PATCH 21/26] fix: Move logic to separate fiels --- bundle/internal/docs/markdown.go | 115 ++++++++++++++ bundle/internal/docs/{docs.go => nodes.go} | 174 --------------------- bundle/internal/docs/refs.go | 71 +++++++++ 3 files changed, 186 insertions(+), 174 deletions(-) create mode 100644 bundle/internal/docs/markdown.go rename bundle/internal/docs/{docs.go => nodes.go} (50%) create mode 100644 bundle/internal/docs/refs.go diff --git a/bundle/internal/docs/markdown.go b/bundle/internal/docs/markdown.go new file mode 100644 index 0000000000..3bb38ab436 --- /dev/null +++ b/bundle/internal/docs/markdown.go @@ -0,0 +1,115 @@ +package main + +import ( + "fmt" + "log" + "os" + "strings" + + md "github.com/nao1215/markdown" +) + +func buildMarkdown(nodes []rootNode, outputFile, header string) error { + f, err := os.Create(outputFile) + if err != nil { + log.Fatal(err) + } + defer f.Close() + + m := md.NewMarkdown(f) + m = m.PlainText(header) + for _, node := range nodes { + m = m.LF() + if node.TopLevel { + m = m.H2(node.Title) + } else { + m = m.H3(node.Title) + } + m = m.LF() + + if node.Type != "" { + m = m.PlainText(fmt.Sprintf("**`Type: %s`**", node.Type)) + m = m.LF() + } + m = m.PlainText(node.Description) + m = m.LF() + + if len(node.ObjectKeyAttributes) > 0 { + n := removePluralForm(node.Title) + m = m.CodeBlocks("yaml", fmt.Sprintf("%ss:\n <%s-name>:\n <%s-field-name>: <%s-field-value>", n, n, n, n)) + m = m.LF() + m = buildAttributeTable(m, node.ObjectKeyAttributes) + } else if len(node.ArrayItemAttributes) > 0 { + m = m.LF() + m = buildAttributeTable(m, node.ArrayItemAttributes) + } else if len(node.Attributes) > 0 { + m = m.LF() + m = buildAttributeTable(m, node.Attributes) + } + + if node.Example != "" { + m = m.LF() + m = m.PlainText("**Example**") + m = m.LF() + m = m.PlainText(node.Example) + } + } + + err = m.Build() + if err != nil { + log.Fatal(err) + } + + return nil +} + +// Build a custom table which we use in Databricks website +func buildCustomAttributeTable(m *md.Markdown, attributes []attributeNode) *md.Markdown { + m = m.LF() + m = m.PlainText(".. list-table::") + m = m.PlainText(" :header-rows: 1") + m = m.LF() + + m = m.PlainText(" * - Key") + m = m.PlainText(" - Type") + m = m.PlainText(" - Description") + m = m.LF() + + for _, a := range attributes { + m = m.PlainText(" * - " + fmt.Sprintf("`%s`", a.Title)) + m = m.PlainText(" - " + a.Type) + m = m.PlainText(" - " + formatDescription(a)) + m = m.LF() + } + return m +} + +func buildAttributeTable(m *md.Markdown, attributes []attributeNode) *md.Markdown { + return buildCustomAttributeTable(m, attributes) + + // Rows below are useful for debugging since it renders the table in a regular markdown format + + // rows := [][]string{} + // for _, n := range attributes { + // rows = append(rows, []string{fmt.Sprintf("`%s`", n.Title), n.Type, formatDescription(n.Description)}) + // } + // m = m.CustomTable(md.TableSet{ + // Header: []string{"Key", "Type", "Description"}, + // Rows: rows, + // }, md.TableOptions{AutoWrapText: false, AutoFormatHeaders: false}) + + // return m +} + +func formatDescription(a attributeNode) string { + s := strings.ReplaceAll(a.Description, "\n", " ") + if a.Reference != "" { + if strings.HasSuffix(s, ".") { + s += " " + } else if s != "" { + s += ". " + } + s += fmt.Sprintf("See %s.", md.Link("_", "#"+a.Reference)) + } + return s +} diff --git a/bundle/internal/docs/docs.go b/bundle/internal/docs/nodes.go similarity index 50% rename from bundle/internal/docs/docs.go rename to bundle/internal/docs/nodes.go index de32c118f0..14426f8b84 100644 --- a/bundle/internal/docs/docs.go +++ b/bundle/internal/docs/nodes.go @@ -1,15 +1,10 @@ package main import ( - "fmt" - "log" - "os" "sort" "strings" "github.com/databricks/cli/libs/jsonschema" - - md "github.com/nao1215/markdown" ) type rootNode struct { @@ -84,7 +79,6 @@ func getNodes(s jsonschema.Schema, refs map[string]jsonschema.Schema, customFiel if v.Items != nil { arrayItemType := resolveRefs(v.Items, refs) node.ArrayItemAttributes = getAttributes(arrayItemType.Properties, refs, k) - // rootProps = append(rootProps, extractNodes(k, arrayItemType.Properties, refs, customFields)...) } isEmpty := len(node.Attributes) == 0 && len(node.ObjectKeyAttributes) == 0 && len(node.ArrayItemAttributes) == 0 @@ -100,60 +94,6 @@ func getNodes(s jsonschema.Schema, refs map[string]jsonschema.Schema, customFiel return nodes } -func buildMarkdown(nodes []rootNode, outputFile, header string) error { - f, err := os.Create(outputFile) - if err != nil { - log.Fatal(err) - } - defer f.Close() - - m := md.NewMarkdown(f) - m = m.PlainText(header) - for _, node := range nodes { - m = m.LF() - if node.TopLevel { - m = m.H2(node.Title) - } else { - m = m.H3(node.Title) - } - m = m.LF() - - if node.Type != "" { - m = m.PlainText(fmt.Sprintf("**`Type: %s`**", node.Type)) - m = m.LF() - } - m = m.PlainText(node.Description) - m = m.LF() - - if len(node.ObjectKeyAttributes) > 0 { - n := removePluralForm(node.Title) - m = m.CodeBlocks("yaml", fmt.Sprintf("%ss:\n <%s-name>:\n <%s-field-name>: <%s-field-value>", n, n, n, n)) - m = m.LF() - m = buildAttributeTable(m, node.ObjectKeyAttributes) - } else if len(node.ArrayItemAttributes) > 0 { - m = m.LF() - m = buildAttributeTable(m, node.ArrayItemAttributes) - } else if len(node.Attributes) > 0 { - m = m.LF() - m = buildAttributeTable(m, node.Attributes) - } - - if node.Example != "" { - m = m.LF() - m = m.PlainText("**Example**") - m = m.LF() - m = m.PlainText(node.Example) - } - } - - err = m.Build() - if err != nil { - log.Fatal(err) - } - - return nil -} - func removePluralForm(s string) string { if strings.HasSuffix(s, "s") { return strings.TrimSuffix(s, "s") @@ -161,57 +101,6 @@ func removePluralForm(s string) string { return s } -func buildAttributeTable(m *md.Markdown, attributes []attributeNode) *md.Markdown { - return buildCustomAttributeTable(m, attributes) - - // Rows below are useful for debugging since it renders the table in a regular markdown format - - // rows := [][]string{} - // for _, n := range attributes { - // rows = append(rows, []string{fmt.Sprintf("`%s`", n.Title), n.Type, formatDescription(n.Description)}) - // } - // m = m.CustomTable(md.TableSet{ - // Header: []string{"Key", "Type", "Description"}, - // Rows: rows, - // }, md.TableOptions{AutoWrapText: false, AutoFormatHeaders: false}) - - // return m -} - -func formatDescription(a attributeNode) string { - s := strings.ReplaceAll(a.Description, "\n", " ") - if a.Reference != "" { - if strings.HasSuffix(s, ".") { - s += " " - } else if s != "" { - s += ". " - } - s += fmt.Sprintf("See %s.", md.Link("_", "#"+a.Reference)) - } - return s -} - -// Build a custom table which we use in Databricks website -func buildCustomAttributeTable(m *md.Markdown, attributes []attributeNode) *md.Markdown { - m = m.LF() - m = m.PlainText(".. list-table::") - m = m.PlainText(" :header-rows: 1") - m = m.LF() - - m = m.PlainText(" * - Key") - m = m.PlainText(" - Type") - m = m.PlainText(" - Description") - m = m.LF() - - for _, a := range attributes { - m = m.PlainText(" * - " + fmt.Sprintf("`%s`", a.Title)) - m = m.PlainText(" - " + a.Type) - m = m.PlainText(" - " + formatDescription(a)) - m = m.LF() - } - return m -} - func getHumanReadableType(t jsonschema.Type) string { typesMapping := map[string]string{ "string": "String", @@ -248,35 +137,6 @@ func getAttributes(props map[string]*jsonschema.Schema, refs map[string]jsonsche return attributes } -func isReferenceType(v *jsonschema.Schema, refs map[string]jsonschema.Schema) bool { - if len(v.Properties) > 0 { - return true - } - if v.Items != nil { - items := resolveRefs(v.Items, refs) - if items != nil && items.Type == "object" { - return true - } - } - props := resolveAdditionaProperties(v, refs) - if props != nil && props.Type == "object" { - return true - } - - return false -} - -func resolveAdditionaProperties(v *jsonschema.Schema, refs map[string]jsonschema.Schema) *jsonschema.Schema { - if v.AdditionalProperties == nil { - return nil - } - additionalProps, ok := v.AdditionalProperties.(*jsonschema.Schema) - if !ok { - return nil - } - return resolveRefs(additionalProps, refs) -} - func getDescription(s *jsonschema.Schema, allowMarkdown bool) string { if allowMarkdown && s.MarkdownDescription != "" { return s.MarkdownDescription @@ -284,40 +144,6 @@ func getDescription(s *jsonschema.Schema, allowMarkdown bool) string { return s.Description } -func resolveRefs(s *jsonschema.Schema, schemas map[string]jsonschema.Schema) *jsonschema.Schema { - node := s - - description := s.Description - markdownDescription := s.MarkdownDescription - examples := s.Examples - - for node.Reference != nil { - ref := strings.TrimPrefix(*node.Reference, "#/$defs/") - newNode, ok := schemas[ref] - if !ok { - log.Printf("schema %s not found", ref) - } - - if description == "" { - description = newNode.Description - } - if markdownDescription == "" { - markdownDescription = newNode.MarkdownDescription - } - if len(examples) == 0 { - examples = newNode.Examples - } - - node = &newNode - } - - node.Description = description - node.MarkdownDescription = markdownDescription - node.Examples = examples - - return node -} - func shouldExtract(ref string, customFields map[string]bool) bool { if i := strings.Index(ref, "github.com"); i >= 0 { ref = ref[i:] diff --git a/bundle/internal/docs/refs.go b/bundle/internal/docs/refs.go new file mode 100644 index 0000000000..520a6e2c5a --- /dev/null +++ b/bundle/internal/docs/refs.go @@ -0,0 +1,71 @@ +package main + +import ( + "log" + "strings" + + "github.com/databricks/cli/libs/jsonschema" +) + +func isReferenceType(v *jsonschema.Schema, refs map[string]jsonschema.Schema) bool { + if len(v.Properties) > 0 { + return true + } + if v.Items != nil { + items := resolveRefs(v.Items, refs) + if items != nil && items.Type == "object" { + return true + } + } + props := resolveAdditionaProperties(v, refs) + if props != nil && props.Type == "object" { + return true + } + + return false +} + +func resolveAdditionaProperties(v *jsonschema.Schema, refs map[string]jsonschema.Schema) *jsonschema.Schema { + if v.AdditionalProperties == nil { + return nil + } + additionalProps, ok := v.AdditionalProperties.(*jsonschema.Schema) + if !ok { + return nil + } + return resolveRefs(additionalProps, refs) +} + +func resolveRefs(s *jsonschema.Schema, schemas map[string]jsonschema.Schema) *jsonschema.Schema { + node := s + + description := s.Description + markdownDescription := s.MarkdownDescription + examples := s.Examples + + for node.Reference != nil { + ref := strings.TrimPrefix(*node.Reference, "#/$defs/") + newNode, ok := schemas[ref] + if !ok { + log.Printf("schema %s not found", ref) + } + + if description == "" { + description = newNode.Description + } + if markdownDescription == "" { + markdownDescription = newNode.MarkdownDescription + } + if len(examples) == 0 { + examples = newNode.Examples + } + + node = &newNode + } + + node.Description = description + node.MarkdownDescription = markdownDescription + node.Examples = examples + + return node +} From 0bd7b524dbd9bcd1d4edffefe19a81bf8c753b43 Mon Sep 17 00:00:00 2001 From: Ilya Kuznetsov Date: Fri, 10 Jan 2025 19:26:45 +0100 Subject: [PATCH 22/26] fix: Invalid refrences --- bundle/internal/docs/nodes.go | 10 +++--- bundle/internal/docs/refs.go | 35 +++++++++++++++---- .../schema/annotations_openapi_overrides.yml | 3 ++ bundle/schema/jsonschema.json | 19 +++++----- 4 files changed, 45 insertions(+), 22 deletions(-) diff --git a/bundle/internal/docs/nodes.go b/bundle/internal/docs/nodes.go index 14426f8b84..4047a690bf 100644 --- a/bundle/internal/docs/nodes.go +++ b/bundle/internal/docs/nodes.go @@ -59,7 +59,7 @@ func getNodes(s jsonschema.Schema, refs map[string]jsonschema.Schema, customFiel Type: getHumanReadableType(v.Type), } - node.Attributes = getAttributes(v.Properties, refs, k) + node.Attributes = getAttributes(v.Properties, refs, customFields, k) rootProps = append(rootProps, extractNodes(k, v.Properties, refs, customFields)...) additionalProps, ok := v.AdditionalProperties.(*jsonschema.Schema) @@ -72,13 +72,13 @@ func getNodes(s jsonschema.Schema, refs map[string]jsonschema.Schema, customFiel if len(node.Example) == 0 { node.Example = getExample(objectKeyType) } - node.ObjectKeyAttributes = getAttributes(objectKeyType.Properties, refs, k) + node.ObjectKeyAttributes = getAttributes(objectKeyType.Properties, refs, customFields, k) rootProps = append(rootProps, extractNodes(k, objectKeyType.Properties, refs, customFields)...) } if v.Items != nil { arrayItemType := resolveRefs(v.Items, refs) - node.ArrayItemAttributes = getAttributes(arrayItemType.Properties, refs, k) + node.ArrayItemAttributes = getAttributes(arrayItemType.Properties, refs, customFields, k) } isEmpty := len(node.Attributes) == 0 && len(node.ObjectKeyAttributes) == 0 && len(node.ArrayItemAttributes) == 0 @@ -112,7 +112,7 @@ func getHumanReadableType(t jsonschema.Type) string { return typesMapping[string(t)] } -func getAttributes(props map[string]*jsonschema.Schema, refs map[string]jsonschema.Schema, prefix string) []attributeNode { +func getAttributes(props map[string]*jsonschema.Schema, refs map[string]jsonschema.Schema, customFields map[string]bool, prefix string) []attributeNode { attributes := []attributeNode{} for k, v := range props { v = resolveRefs(v, refs) @@ -121,7 +121,7 @@ func getAttributes(props map[string]*jsonschema.Schema, refs map[string]jsonsche typeString = "Any" } var reference string - if isReferenceType(v, refs) { + if isReferenceType(v, refs, customFields) { reference = prefix + "." + k } attributes = append(attributes, attributeNode{ diff --git a/bundle/internal/docs/refs.go b/bundle/internal/docs/refs.go index 520a6e2c5a..11651041b7 100644 --- a/bundle/internal/docs/refs.go +++ b/bundle/internal/docs/refs.go @@ -7,7 +7,10 @@ import ( "github.com/databricks/cli/libs/jsonschema" ) -func isReferenceType(v *jsonschema.Schema, refs map[string]jsonschema.Schema) bool { +func isReferenceType(v *jsonschema.Schema, refs map[string]jsonschema.Schema, customFields map[string]bool) bool { + if v.Type != "object" && v.Type != "array" { + return false + } if len(v.Properties) > 0 { return true } @@ -17,15 +20,26 @@ func isReferenceType(v *jsonschema.Schema, refs map[string]jsonschema.Schema) bo return true } } - props := resolveAdditionaProperties(v, refs) - if props != nil && props.Type == "object" { - return true + props := resolveAdditionaProperties(v) + if !isInOwnFields(props, customFields) { + return false + } + if props != nil { + propsResolved := resolveRefs(props, refs) + return propsResolved.Type == "object" } return false } -func resolveAdditionaProperties(v *jsonschema.Schema, refs map[string]jsonschema.Schema) *jsonschema.Schema { +func isInOwnFields(node *jsonschema.Schema, customFields map[string]bool) bool { + if node != nil && node.Reference != nil { + return customFields[getRefType(node)] + } + return true +} + +func resolveAdditionaProperties(v *jsonschema.Schema) *jsonschema.Schema { if v.AdditionalProperties == nil { return nil } @@ -33,7 +47,7 @@ func resolveAdditionaProperties(v *jsonschema.Schema, refs map[string]jsonschema if !ok { return nil } - return resolveRefs(additionalProps, refs) + return additionalProps } func resolveRefs(s *jsonschema.Schema, schemas map[string]jsonschema.Schema) *jsonschema.Schema { @@ -44,7 +58,7 @@ func resolveRefs(s *jsonschema.Schema, schemas map[string]jsonschema.Schema) *js examples := s.Examples for node.Reference != nil { - ref := strings.TrimPrefix(*node.Reference, "#/$defs/") + ref := getRefType(node) newNode, ok := schemas[ref] if !ok { log.Printf("schema %s not found", ref) @@ -69,3 +83,10 @@ func resolveRefs(s *jsonschema.Schema, schemas map[string]jsonschema.Schema) *js return node } + +func getRefType(node *jsonschema.Schema) string { + if node.Reference == nil { + return "" + } + return strings.TrimPrefix(*node.Reference, "#/$defs/") +} diff --git a/bundle/internal/schema/annotations_openapi_overrides.yml b/bundle/internal/schema/annotations_openapi_overrides.yml index 36314b777b..99ea27c625 100644 --- a/bundle/internal/schema/annotations_openapi_overrides.yml +++ b/bundle/internal/schema/annotations_openapi_overrides.yml @@ -124,6 +124,9 @@ github.com/databricks/cli/bundle/config/resources.MlflowExperiment: "description": |- PLACEHOLDER github.com/databricks/cli/bundle/config/resources.MlflowModel: + "_": + "markdown_description": |- + The model resource allows you to define [legacy models](/api/workspace/modelregistry/createmodel) in bundles. Databricks recommends you use [registered models](#registered-model) instead. "permissions": "description": |- PLACEHOLDER diff --git a/bundle/schema/jsonschema.json b/bundle/schema/jsonschema.json index 990a2f133f..ec721c1406 100644 --- a/bundle/schema/jsonschema.json +++ b/bundle/schema/jsonschema.json @@ -461,7 +461,8 @@ "$ref": "#/$defs/string" } }, - "additionalProperties": false + "additionalProperties": false, + "markdownDescription": "The model resource allows you to define [legacy models](https://docs.databricks.com/api/workspace/modelregistry/createmodel) in bundles. Databricks recommends you use \u003cUC\u003e [registered models](https://docs.databricks.com/dev-tools/bundles/reference.html#registered-model) instead." }, { "type": "string", @@ -941,6 +942,7 @@ }, "variable.Variable": { "type": "object", + "description": "Defines a custom variable for the bundle.", "properties": { "default": { "$ref": "#/$defs/interface" @@ -959,7 +961,8 @@ "$ref": "#/$defs/github.com/databricks/cli/bundle/config/variable.VariableType" } }, - "additionalProperties": false + "additionalProperties": false, + "markdownDescription": "Defines a custom variable for the bundle. See [variables](https://docs.databricks.com/dev-tools/bundles/settings.html#variables)." }, "variable.VariableType": { "type": "string" @@ -1088,8 +1091,7 @@ }, "lock": { "description": "The deployment lock attributes.", - "$ref": "#/$defs/github.com/databricks/cli/bundle/config.Lock", - "markdownDescription": "The deployment lock attributes. See [lock](https://docs.databricks.com/dev-tools/bundles/reference.html#lock)." + "$ref": "#/$defs/github.com/databricks/cli/bundle/config.Lock" } }, "additionalProperties": false @@ -1379,8 +1381,7 @@ "properties": { "artifacts": { "description": "The artifacts to include in the target deployment.", - "$ref": "#/$defs/map/github.com/databricks/cli/bundle/config.Artifact", - "markdownDescription": "The artifacts to include in the target deployment. See [artifact](https://docs.databricks.com/dev-tools/bundles/reference.html#artifact)" + "$ref": "#/$defs/map/github.com/databricks/cli/bundle/config.Artifact" }, "bundle": { "description": "The bundle attributes when deploying to this target.", @@ -1426,13 +1427,11 @@ }, "sync": { "description": "The local paths to sync to the target workspace when a bundle is run or deployed.", - "$ref": "#/$defs/github.com/databricks/cli/bundle/config.Sync", - "markdownDescription": "The local paths to sync to the target workspace when a bundle is run or deployed. See [sync](https://docs.databricks.com/dev-tools/bundles/reference.html#sync)." + "$ref": "#/$defs/github.com/databricks/cli/bundle/config.Sync" }, "variables": { "description": "The custom variable definitions for the target.", - "$ref": "#/$defs/map/github.com/databricks/cli/bundle/config/variable.TargetVariable", - "markdownDescription": "The custom variable definitions for the target. See [link](https://docs.databricks.com/dev-tools/bundles/variables.html)." + "$ref": "#/$defs/map/github.com/databricks/cli/bundle/config/variable.TargetVariable" }, "workspace": { "description": "The Databricks workspace for the target.", From 880a4cf49e44c2ac3451ef283eac453ed989602b Mon Sep 17 00:00:00 2001 From: Ilya Kuznetsov Date: Fri, 10 Jan 2025 19:29:16 +0100 Subject: [PATCH 23/26] fix: Few extra links --- bundle/internal/schema/annotations.yml | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/bundle/internal/schema/annotations.yml b/bundle/internal/schema/annotations.yml index f7e1aca962..8ac789cc7b 100644 --- a/bundle/internal/schema/annotations.yml +++ b/bundle/internal/schema/annotations.yml @@ -38,12 +38,12 @@ github.com/databricks/cli/bundle/config.Bundle: "description": |- The definition of the bundle deployment "markdown_description": |- - The definition of the bundle deployment. For supported attributes, see [_](#deployment) and [_](/dev-tools/bundles/deployment-modes.md). + The definition of the bundle deployment. For supported attributes see [_](/dev-tools/bundles/deployment-modes.md). "git": "description": |- The Git version control details that are associated with your bundle. "markdown_description": |- - The Git version control details that are associated with your bundle. For supported attributes, see [_](#git) and [_](/dev-tools/bundles/settings.md#git). + The Git version control details that are associated with your bundle. For supported attributes see [_](/dev-tools/bundles/settings.md#git). "name": "description": |- The name of the bundle. @@ -337,8 +337,6 @@ github.com/databricks/cli/bundle/config.Target: "workspace": "description": |- The Databricks workspace for the target. - "markdown_description": |- - The Databricks workspace for the target. [_](#workspace) github.com/databricks/cli/bundle/config.Workspace: "artifact_path": "description": |- From c546604cc4d8176c213dac8c347db13d597b3c72 Mon Sep 17 00:00:00 2001 From: Ilya Kuznetsov Date: Fri, 10 Jan 2025 19:29:43 +0100 Subject: [PATCH 24/26] fix: Schema bump --- bundle/schema/jsonschema.json | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/bundle/schema/jsonschema.json b/bundle/schema/jsonschema.json index ec721c1406..50f1c6f956 100644 --- a/bundle/schema/jsonschema.json +++ b/bundle/schema/jsonschema.json @@ -1051,12 +1051,12 @@ "deployment": { "description": "The definition of the bundle deployment", "$ref": "#/$defs/github.com/databricks/cli/bundle/config.Deployment", - "markdownDescription": "The definition of the bundle deployment. For supported attributes, see [deployment](https://docs.databricks.com/dev-tools/bundles/reference.html#deployment) and [link](https://docs.databricks.com/dev-tools/bundles/deployment-modes.html)." + "markdownDescription": "The definition of the bundle deployment. For supported attributes see [link](https://docs.databricks.com/dev-tools/bundles/deployment-modes.html)." }, "git": { "description": "The Git version control details that are associated with your bundle.", "$ref": "#/$defs/github.com/databricks/cli/bundle/config.Git", - "markdownDescription": "The Git version control details that are associated with your bundle. For supported attributes, see [git](https://docs.databricks.com/dev-tools/bundles/reference.html#git) and [git](https://docs.databricks.com/dev-tools/bundles/settings.html#git)." + "markdownDescription": "The Git version control details that are associated with your bundle. For supported attributes see [git](https://docs.databricks.com/dev-tools/bundles/settings.html#git)." }, "name": { "description": "The name of the bundle.", @@ -1435,8 +1435,7 @@ }, "workspace": { "description": "The Databricks workspace for the target.", - "$ref": "#/$defs/github.com/databricks/cli/bundle/config.Workspace", - "markdownDescription": "The Databricks workspace for the target. [workspace](https://docs.databricks.com/dev-tools/bundles/reference.html#workspace)" + "$ref": "#/$defs/github.com/databricks/cli/bundle/config.Workspace" } }, "additionalProperties": false From 90cafad8c8685fe16e070af86bdccd41d98359a6 Mon Sep 17 00:00:00 2001 From: Ilya Kuznetsov Date: Fri, 10 Jan 2025 19:57:02 +0100 Subject: [PATCH 25/26] fix: Allow nodes with only description --- bundle/internal/docs/nodes.go | 2 +- bundle/internal/schema/annotations.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/bundle/internal/docs/nodes.go b/bundle/internal/docs/nodes.go index 4047a690bf..4507de08a3 100644 --- a/bundle/internal/docs/nodes.go +++ b/bundle/internal/docs/nodes.go @@ -81,7 +81,7 @@ func getNodes(s jsonschema.Schema, refs map[string]jsonschema.Schema, customFiel node.ArrayItemAttributes = getAttributes(arrayItemType.Properties, refs, customFields, k) } - isEmpty := len(node.Attributes) == 0 && len(node.ObjectKeyAttributes) == 0 && len(node.ArrayItemAttributes) == 0 + isEmpty := node.Description == "" && len(node.Attributes) == 0 && len(node.ObjectKeyAttributes) == 0 && len(node.ArrayItemAttributes) == 0 shouldAddNode := !isEmpty || node.TopLevel if shouldAddNode { nodes = append(nodes, node) diff --git a/bundle/internal/schema/annotations.yml b/bundle/internal/schema/annotations.yml index 8ac789cc7b..6b5d873b3f 100644 --- a/bundle/internal/schema/annotations.yml +++ b/bundle/internal/schema/annotations.yml @@ -480,7 +480,7 @@ github.com/databricks/cli/bundle/config/variable.Variable: "description": |- The name of the alert, cluster_policy, cluster, dashboard, instance_pool, job, metastore, pipeline, query, service_principal, or warehouse object for which to retrieve an ID. "markdown_description": |- - The name of the `alert`, `cluster_policy`, `cluster`, `dashboard`, `instance_pool`, `job`, `metastore`, `pipeline`, `query`, `service_principal`, or `warehouse` object for which to retrieve an ID." + The name of the `alert`, `cluster_policy`, `cluster`, `dashboard`, `instance_pool`, `job`, `metastore`, `pipeline`, `query`, `service_principal`, or `warehouse` object for which to retrieve an ID. "type": "description": |- The type of the variable. From fd88e4c97ae9f93579096057c3e3f11eaa43ea11 Mon Sep 17 00:00:00 2001 From: Ilya Kuznetsov Date: Fri, 10 Jan 2025 20:35:44 +0100 Subject: [PATCH 26/26] fix: Use markdown from original pages --- bundle/internal/docs/main.go | 24 --------- bundle/internal/docs/markdown.go | 88 ++++++++++++++++++++++++++++++++ 2 files changed, 88 insertions(+), 24 deletions(-) diff --git a/bundle/internal/docs/main.go b/bundle/internal/docs/main.go index b3e95db68d..e4e603b1d2 100644 --- a/bundle/internal/docs/main.go +++ b/bundle/internal/docs/main.go @@ -13,30 +13,6 @@ import ( "github.com/databricks/cli/libs/jsonschema" ) -const ( - rootFileName = "reference.md" - rootHeader = `--- -description: Configuration reference for databricks.yml ---- - -# Configuration reference - -This article provides reference for keys supported by configuration (YAML). See [\_](/dev-tools/bundles/index.md). -` -) - -const ( - resourcesFileName = "resources-reference.md" - resourcesHeader = `--- -description: Resources references for databricks.yml ---- - -# Resources reference - -This article provides reference for keys supported by configuration (YAML). See [\_](/dev-tools/bundles/index.md). -` -) - func main() { if len(os.Args) != 3 { fmt.Println("Usage: go run main.go ") diff --git a/bundle/internal/docs/markdown.go b/bundle/internal/docs/markdown.go index 3bb38ab436..508f63578b 100644 --- a/bundle/internal/docs/markdown.go +++ b/bundle/internal/docs/markdown.go @@ -9,6 +9,94 @@ import ( md "github.com/nao1215/markdown" ) +const ( + rootFileName = "reference.md" + rootHeader = `--- +description: Configuration reference for databricks.yml +--- + +# Configuration reference + +This article provides reference for keys supported by configuration (YAML). See [_](/dev-tools/bundles/index.md). + +For complete bundle examples, see [_](/dev-tools/bundles/resource-examples.md) and the [bundle-examples GitHub repository](https://github.com/databricks/bundle-examples). +` +) + +const ( + resourcesFileName = "resources.md" + resourcesHeader = `--- +description: Learn about resources supported by Databricks Asset Bundles and how to configure them. +--- + +# resources + + allows you to specify information about the resources used by the bundle in the ` + "`" + `resources` + "`" + ` mapping in the bundle configuration. See [resources mapping](/dev-tools/bundles/settings.md#resources) and [resources key reference](/dev-tools/bundles/reference.md#resources). + +This article outlines supported resource types for bundles and provides details and an example for each supported type. For additional examples, see [_](/dev-tools/bundles/resource-examples.md). + +## Supported resources + +The following table lists supported resource types for bundles. Some resources can be created by defining them in a bundle and deploying the bundle, and some resources only support referencing an existing resource to include in the bundle. + +Resources are defined using the corresponding [Databricks REST API](/api/workspace/introduction) object's create operation request payload, where the object's supported fields, expressed as YAML, are the resource's supported properties. Links to documentation for each resource's corresponding payloads are listed in the table. + +.. tip:: The ` + "`" + `databricks bundle validate` + "`" + ` command returns warnings if unknown resource properties are found in bundle configuration files. + + +.. list-table:: + :header-rows: 1 + + * - Resource + - Create support + - Corresponding REST API object + + * - [cluster](#cluster) + - ✓ + - [Cluster object](/api/workspace/clusters/create) + + * - [dashboard](#dashboard) + - + - [Dashboard object](/api/workspace/lakeview/create) + + * - [experiment](#experiment) + - ✓ + - [Experiment object](/api/workspace/experiments/createexperiment) + + * - [job](#job) + - ✓ + - [Job object](/api/workspace/jobs/create) + + * - [model (legacy)](#model-legacy) + - ✓ + - [Model (legacy) object](/api/workspace/modelregistry/createmodel) + + * - [model_serving_endpoint](#model-serving-endpoint) + - ✓ + - [Model serving endpoint object](/api/workspace/servingendpoints/create) + + * - [pipeline](#pipeline) + - ✓ + - [Pipeline object]](/api/workspace/pipelines/create) + + * - [quality_monitor](#quality-monitor) + - ✓ + - [Quality monitor object](/api/workspace/qualitymonitors/create) + + * - [registered_model](#registered-model) () + - ✓ + - [Registered model object](/api/workspace/registeredmodels/create) + + * - [schema](#schema) () + - ✓ + - [Schema object](/api/workspace/schemas/create) + + * - [volume](#volume) () + - ✓ + - [Volume object](/api/workspace/volumes/create) +` +) + func buildMarkdown(nodes []rootNode, outputFile, header string) error { f, err := os.Create(outputFile) if err != nil {