diff --git a/.chloggen/attribute_references.yaml b/.chloggen/attribute_references.yaml new file mode 100644 index 0000000000..8fb93c1a69 --- /dev/null +++ b/.chloggen/attribute_references.yaml @@ -0,0 +1,4 @@ +change_type: enhancement +component: specification +note: Defines "reference attribute" concept +issues: [1428] diff --git a/docs/general/attribute-naming.md b/docs/general/attribute-naming.md index cded4a49ca..21c8dce6d7 100644 --- a/docs/general/attribute-naming.md +++ b/docs/general/attribute-naming.md @@ -46,17 +46,23 @@ Names SHOULD follow these rules: indicate entity hierarchies. This purpose should primarily drive the decision about forming nested namespaces. +- The special namespace `blob_ref.*` and `*.blob_ref.*` is reserved for + [Blob Reference Properties](./blob-reference-properties.md) and should not be used + except in the manner outlined in that specification. + - For each multi-word dot-delimited component of the attribute name separate the words by underscores (i.e. use snake_case). For example `http.response.status_code` denotes the status code in the http namespace. - Names SHOULD NOT coincide with namespaces. For example if - `service.instance.id` is an attribute name then it is no longer valid to have - an attribute named `service.instance` because `service.instance` is already a + `service.instance.id` is an attribute name, then it is no longer valid to have + an attribute named `service.instance`, because `service.instance` is already a namespace. Because of this rule be careful when choosing names: every existing name prohibits existence of an equally named namespace in the future, and vice versa: any existing namespace prohibits existence of an equally named - attribute key in the future. + attribute key in the future. Note that + [Blob Reference Properties](./blob-reference-properties.md) are exempt + from this rule, because they are/replace the original attribute. ## Name Pluralization Guidelines @@ -173,5 +179,18 @@ and protocols. Any additions to the `otel.*` namespace MUST be approved as part of OpenTelemetry specification. +## Reference Attributes + +A "reference attribute" is a set of derived attribute names that are used to +provide the value of an attribute indirectly via a URI reference to a storage +system where the value of the attribute may be retrieved. + +In general, if there exists an attribute `somekey`, then there is implicitly +defined another attribute `somekey.blob_ref.uri` which may be used to provide +the value of the attribute `somekey` by reference to an external storage +system from which the value of `somekey` may be fetched. + +See [Blob Reference Properties](./blob-reference-properties.md) for details. + [DocumentStatus]: https://opentelemetry.io/docs/specs/otel/document-status diff --git a/docs/general/blob-reference-properties.md b/docs/general/blob-reference-properties.md new file mode 100644 index 0000000000..aecf53c0c1 --- /dev/null +++ b/docs/general/blob-reference-properties.md @@ -0,0 +1,54 @@ +# Blob Reference Properties + +This refers to a way for attributes and fields to carry a reference to +data stored in an external storage system. + +## Motivation + +Much like with the usage of pointers in programming, there are use cases +where it is preferable to reference data rather than to copy it. There are +situations where it is impractical or inconvenient for a signal to include +the full value rather than to supply a reference; for example, the data may +be too large to fit within the limits of a signals operations backend. Or +there may be a situation in which the use of a reference is convenient for +applying a separate access control from that used for the signal data. + +## In Open Telemetry Concepts + +A blob reference property can exist in attributes (e.g. in spans, logs, span events) +as well as in event bodies. This document will use the term "reference attributes" +when referring to the use of Blob Reference Properties in attributes, while the +term "reference fields" will be used to refer to Blob Reference Properties in body fields. + +## Minimal Requirement for a Blob Reference Property + +The key `blob_ref.uri` or `{prefix}.blob_ref.uri` must exist and contain a valid URI. The +URI is presumed to refer to the storage location from which the referenced data may be retrieved. +The URI can be of any format, including HTTP ('http://'), HTTPS ('https://'), Google Cloud Storage ('gs://`), +Amazon S3 ('s3://'), Azure Blob ('azblob://'), or any other general or vendor-specific URI. + +A key of the form `{prefix}.blob_ref.uri` indicates that the URI designates the location where the value +for the key named `{prefix}` has been stored. The prefix indicates a narrow scoping of the reference. + +A key of the form `blob_ref.uri` with no prefix indicates that the containing object (such as the `AnyValue` +used to store the event body fields) in its entirety has its true value at the given location. + +## Optional Metadata for Blob Reference Properties + +A key of the form `{prefix}.blob_ref.uri` may be accompanied by `{prefix}.blob_ref.metadata-key` (and similarly `blob_ref.uri` may be accompanied by `blob_ref.metadata-key`) for certain, well-defined metadata. + +The following metadata are defined and valid: + +- `[*.]blob_ref.content_type`: the MIME type of the data (e.g. `text/plain`, `application/json`, `application/octet-stream`) +- `[*.]blob_ref.size`: the size of the attribute value in bytes +- `[*.]blob_ref.hash_value`: a hash of the data for validation +- `[*.]blob_ref.hash_algorithm`: the algorithm used to compute the hash + +## Original Field/Key with a Reference + +If both a Blob Reference Property and its non-reference variant appear together +within a signal (e.g. both `somekey` and `somekey.blob_ref.uri` are present), +it should be assumed that only the storage location specified by reference +contains the full, complete, original value of the data; the non-reference +variant may be used to preview/summarize the data but should be assumed to +potentially contain a truncated, redacted, or otherwise non-original value. diff --git a/docs/general/events.md b/docs/general/events.md index 73bf87be5c..7acce59191 100644 --- a/docs/general/events.md +++ b/docs/general/events.md @@ -95,9 +95,24 @@ Recommendations on using attributes vs. body fields: requirements don't apply to event payload fields. * The definition for OpenTelemetry defined events supports describing individual _fields_ (Body Fields) - * The _fields_ are unique to the named event (`event.name`) and different events - may use the same _field_ name to represent different data, due to the unique - nature of the event. + + * The _fields_ are unique to the named event (`event.name`) and different events may use the same _field_ name to represent different data, due to the unique nature of the event. + + * The _fields_ SHOULD NOT use the `blob_ref.*` or `*.blob_ref.*` name pattern; these are reserved for [Blob Reference Properties](./blob-reference-properties.md). + + * The _fields_ of a named event (`event.name`) implicitly include both the defined fields for that type as well as their corresponding [reference representation](./blob-reference-properties.md). + +### Reference fields + +Fields of the body or the entire body may be represented as external references: + +* For any given field `somefield`, there exists a corresponding field `somefield.blob_ref.uri` which may be used to supply a reference to the value. + +* The presence of the field `blob_ref.uri` as a top-level field in the body indicates that the full/original/true value of the entire body may be found at the URI specified by that field (with other fields potentially representing a subset or truncated/redacted copy). + +* For each `blob_ref.uri` or `prefix.blob_ref.uri` field, there may be optional corresponding `[*.]blob_ref.metadata-key` fields containing metadata about the reference such as its content type, size, etc. + +See [Blob Reference Properties](./blob-reference-properties.md) for more details. ## External event compatibility diff --git a/policies/blob_reference_property_reserved_segment.rego b/policies/blob_reference_property_reserved_segment.rego new file mode 100644 index 0000000000..17c6a8df22 --- /dev/null +++ b/policies/blob_reference_property_reserved_segment.rego @@ -0,0 +1,50 @@ +package after_resolution + +import rego.v1 + +reserved_segment_name = "blob_ref" +violation_category = "reserved_name_for_blob_reference_properties" +details_url = "https://github.com/open-telemetry/semantic-conventions/blob/main/docs/general/blob-reference-properties.md" + +deny contains violation if { + group := input.groups[_] + group.prefix != null + contains_segment(group.prefix, reserved_segment_name) + description := sprintf("Registry group '%s' with prefix '%s' contains illegal segment '%s'; reserved for Blob Reference Properties. For more details, see: %s", [group.id, group.prefix, reserved_segment_name, details_url]) + violation := group_level_refattr_violation(description, group.id) +} + +deny contains violation if { + some group in input.groups + some attr in group.attributes + attr.name != null + contains_segment(attr.name, reserved_segment_name) + description := sprintf("Attribute '%s' contains illegal segment '%s'; reserved for Blob Reference Properties. For more details, see: %s", [attr.name, reserved_segment_name, details_url]) + violation := attr_level_refattr_violation(description, group.id, attr.name) +} + +contains_segment(name, target_segment) if { + name_segments := split(name, ".") + some name_segment in name_segments + name_segment == target_segment +} + +group_level_refattr_violation(description, group_id) = violation if { + violation := { + "id": description, + "type": "semconv_attribute", + "category": violation_category, + "attr": "", + "group": group_id, + } +} + +attr_level_refattr_violation(description, group_id, attr_name) = violation if { + violation := { + "id": description, + "type": "semconv_attribute", + "category": violation_category, + "attr": attr_name, + "group": group_id, + } +} diff --git a/policies_test/blob_reference_property_reserved_segment_test.rego b/policies_test/blob_reference_property_reserved_segment_test.rego new file mode 100644 index 0000000000..95ba6bcfa1 --- /dev/null +++ b/policies_test/blob_reference_property_reserved_segment_test.rego @@ -0,0 +1,99 @@ +package after_resolution + +import rego.v1 + +test_group_with_blob_ref_exact if { + count(deny) > 0 with input as { + "groups": [ + { + "id": "registry.test", + "type": "attribute_group", + "prefix": "blob_ref", + "attributes": [{ + "name": "somekey" + }, + ] + } + ] + } +} + +test_group_with_blob_ref_prefix if { + count(deny) > 0 with input as { + "groups": [ + { + "id": "registry.test", + "type": "attribute_group", + "prefix": "blob_ref.foo", + "attributes": [{ + "name": "somekey" + }, + ] + } + ] + } +} + +test_group_with_blob_ref_internal_segment if { +count(deny) > 0 with input as { + "groups": [ + { + "id": "registry.test", + "type": "attribute_group", + "prefix": "foo.blob_ref.bar", + "attributes": [{ + "name": "somekey" + }, + ] + } + ] + } +} + +test_attr_with_blob_ref_exact if { + count(deny) > 0 with input as { + "groups": [ + { + "id": "registry.test", + "type": "attribute_group", + "prefix": "some.group.prefix", + "attributes": [{ + "name": "blob_ref" + }, + ] + } + ] + } +} + +test_attr_with_blob_ref_prefix if { + count(deny) > 0 with input as { + "groups": [ + { + "id": "registry.test", + "type": "attribute_group", + "prefix": "some.group.prefix", + "attributes": [{ + "name": "blob_ref.foo" + }, + ] + } + ] + } +} + +test_attr_with_blob_ref_internal_segment if { + count(deny) > 0 with input as { + "groups": [ + { + "id": "registry.test", + "type": "attribute_group", + "prefix": "some.group.prefix", + "attributes": [{ + "name": "foo.blob_ref.bar" + }, + ] + } + ] + } +}