From f8dccc54676e5c949ca435de6bc475859597fdc5 Mon Sep 17 00:00:00 2001 From: mpreibisch Date: Thu, 2 Jan 2025 18:00:24 -0800 Subject: [PATCH] Introduce Opentelemetry --- Cargo.lock | 330 +++++++++++++++++++++++++++++++++++++++++-- attest/src/main.rs | 8 ++ telemetry/Cargo.toml | 10 +- telemetry/src/lib.rs | 175 +++++++++++++++++++++-- 4 files changed, 495 insertions(+), 28 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index d33d90ed..8711e00a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1018,6 +1018,34 @@ dependencies = [ "tracing", ] +[[package]] +name = "axum" +version = "0.6.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b829e4e32b91e643de6eafe82b1d90675f5874230191a4ffbc1b336dec4d6bf" +dependencies = [ + "async-trait", + "axum-core 0.3.4", + "bitflags 1.2.1", + "bytes", + "futures-util", + "http 0.2.11", + "http-body 0.4.6", + "hyper 0.14.28", + "itoa", + "matchit", + "memchr", + "mime", + "percent-encoding", + "pin-project-lite", + "rustversion", + "serde", + "sync_wrapper 0.1.2", + "tower", + "tower-layer", + "tower-service", +] + [[package]] name = "axum" version = "0.7.5" @@ -1025,7 +1053,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3a6c9af12842a67734c9a2e355436e5d03b22383ed60cf13cd0c18fbfe3dcbcf" dependencies = [ "async-trait", - "axum-core", + "axum-core 0.4.5", "bytes", "futures-util", "http 1.2.0", @@ -1039,12 +1067,29 @@ dependencies = [ "pin-project-lite", "rustversion", "serde", - "sync_wrapper", + "sync_wrapper 1.0.1", "tower", "tower-layer", "tower-service", ] +[[package]] +name = "axum-core" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "759fa577a247914fd3f7f76d62972792636412fbfd634cd452f6a385a74d2d2c" +dependencies = [ + "async-trait", + "bytes", + "futures-util", + "http 0.2.11", + "http-body 0.4.6", + "mime", + "rustversion", + "tower-layer", + "tower-service", +] + [[package]] name = "axum-core" version = "0.4.5" @@ -1060,7 +1105,7 @@ dependencies = [ "mime", "pin-project-lite", "rustversion", - "sync_wrapper", + "sync_wrapper 1.0.1", "tower-layer", "tower-service", ] @@ -1642,7 +1687,7 @@ dependencies = [ "futures-core", "prost 0.13.3", "prost-types 0.13.3", - "tonic", + "tonic 0.12.3", "tracing-core", ] @@ -1666,7 +1711,7 @@ dependencies = [ "thread_local", "tokio", "tokio-stream", - "tonic", + "tonic 0.12.3", "tracing", "tracing-core", "tracing-subscriber", @@ -3504,7 +3549,7 @@ dependencies = [ "portable-atomic", "tokio", "unicode-width 0.2.0", - "web-time", + "web-time 1.1.0", ] [[package]] @@ -4626,6 +4671,156 @@ dependencies = [ "vcpkg", ] +[[package]] +name = "opentelemetry" +version = "0.21.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e32339a5dc40459130b3bd269e9892439f55b33e772d2a9d402a789baaf4e8a" +dependencies = [ + "futures-core", + "futures-sink", + "indexmap 2.7.0", + "js-sys", + "once_cell", + "pin-project-lite", + "thiserror 1.0.65", + "urlencoding", +] + +[[package]] +name = "opentelemetry" +version = "0.22.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "900d57987be3f2aeb70d385fff9b27fb74c5723cc9a52d904d4f9c807a0667bf" +dependencies = [ + "futures-core", + "futures-sink", + "js-sys", + "once_cell", + "pin-project-lite", + "thiserror 1.0.65", + "urlencoding", +] + +[[package]] +name = "opentelemetry-datadog" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "157dbb3739d0a29158aae6c94a43aa842e1f07e6205992ca92a32005e4e77c5d" +dependencies = [ + "futures-core", + "http 0.2.11", + "indexmap 2.7.0", + "itertools 0.11.0", + "once_cell", + "opentelemetry 0.22.0", + "opentelemetry-http", + "opentelemetry-semantic-conventions 0.14.0", + "opentelemetry_sdk 0.22.1", + "rmp", + "thiserror 1.0.65", + "url", +] + +[[package]] +name = "opentelemetry-http" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7690dc77bf776713848c4faa6501157469017eaf332baccd4eb1cea928743d94" +dependencies = [ + "async-trait", + "bytes", + "http 0.2.11", + "opentelemetry 0.22.0", +] + +[[package]] +name = "opentelemetry-otlp" +version = "0.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f24cda83b20ed2433c68241f918d0f6fdec8b1d43b7a9590ab4420c5095ca930" +dependencies = [ + "async-trait", + "futures-core", + "http 0.2.11", + "opentelemetry 0.21.0", + "opentelemetry-proto", + "opentelemetry-semantic-conventions 0.13.0", + "opentelemetry_sdk 0.21.2", + "prost 0.11.9", + "thiserror 1.0.65", + "tokio", + "tonic 0.9.2", +] + +[[package]] +name = "opentelemetry-proto" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2e155ce5cc812ea3d1dffbd1539aed653de4bf4882d60e6e04dcf0901d674e1" +dependencies = [ + "opentelemetry 0.21.0", + "opentelemetry_sdk 0.21.2", + "prost 0.11.9", + "tonic 0.9.2", +] + +[[package]] +name = "opentelemetry-semantic-conventions" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f5774f1ef1f982ef2a447f6ee04ec383981a3ab99c8e77a1a7b30182e65bbc84" +dependencies = [ + "opentelemetry 0.21.0", +] + +[[package]] +name = "opentelemetry-semantic-conventions" +version = "0.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f9ab5bd6c42fb9349dcf28af2ba9a0667f697f9bdcca045d39f2cec5543e2910" + +[[package]] +name = "opentelemetry_sdk" +version = "0.21.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2f16aec8a98a457a52664d69e0091bac3a0abd18ead9b641cb00202ba4e0efe4" +dependencies = [ + "async-trait", + "crossbeam-channel", + "futures-channel", + "futures-executor", + "futures-util", + "glob", + "once_cell", + "opentelemetry 0.21.0", + "ordered-float", + "percent-encoding", + "rand", + "thiserror 1.0.65", + "tokio", + "tokio-stream", +] + +[[package]] +name = "opentelemetry_sdk" +version = "0.22.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e90c7113be649e31e9a0f8b5ee24ed7a16923b322c3c5ab6367469c049d6b7e" +dependencies = [ + "async-trait", + "crossbeam-channel", + "futures-channel", + "futures-executor", + "futures-util", + "once_cell", + "opentelemetry 0.22.0", + "ordered-float", + "percent-encoding", + "rand", + "thiserror 1.0.65", +] + [[package]] name = "option-ext" version = "0.2.0" @@ -4638,7 +4833,7 @@ version = "0.2.10" dependencies = [ "color-eyre", "data-encoding", - "event-listener 2.5.3", + "event-listener 5.3.1", "eyre", "futures", "orb-attest-dbus", @@ -4967,8 +5162,13 @@ name = "orb-telemetry" version = "0.0.0" dependencies = [ "console-subscriber", + "opentelemetry 0.21.0", + "opentelemetry-datadog", + "opentelemetry-otlp", + "opentelemetry_sdk 0.21.2", "tracing", "tracing-journald", + "tracing-opentelemetry", "tracing-subscriber", ] @@ -5181,6 +5381,15 @@ dependencies = [ "zenoh", ] +[[package]] +name = "ordered-float" +version = "4.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7bb71e1b3fa6ca1c61f383464aaf2bb0e2f8e772a1f01d486832464de363b951" +dependencies = [ + "num-traits", +] + [[package]] name = "ordered-stream" version = "0.2.0" @@ -5691,6 +5900,16 @@ dependencies = [ "syn 2.0.90", ] +[[package]] +name = "prost" +version = "0.11.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b82eaa1d779e9a4bc1c3217db8ffbeabaae1dca241bf70183242128d48681cd" +dependencies = [ + "bytes", + "prost-derive 0.11.9", +] + [[package]] name = "prost" version = "0.12.6" @@ -5732,6 +5951,19 @@ dependencies = [ "tempfile", ] +[[package]] +name = "prost-derive" +version = "0.11.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5d2d8d10f3c6ded6da8b05b5fb3b8a5082514344d56c9f871412d29b4e075b4" +dependencies = [ + "anyhow", + "itertools 0.10.5", + "proc-macro2", + "quote", + "syn 1.0.109", +] + [[package]] name = "prost-derive" version = "0.12.6" @@ -5752,7 +5984,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e9552f850d5f0964a4e4d0bf306459ac29323ddfbae05e35a7c0d35cb0803cc5" dependencies = [ "anyhow", - "itertools 0.12.1", + "itertools 0.13.0", "proc-macro2", "quote", "syn 2.0.90", @@ -5862,7 +6094,7 @@ dependencies = [ "thiserror 2.0.6", "tinyvec", "tracing", - "web-time", + "web-time 1.1.0", ] [[package]] @@ -6190,7 +6422,7 @@ dependencies = [ "serde", "serde_json", "serde_urlencoded", - "sync_wrapper", + "sync_wrapper 1.0.1", "tokio", "tokio-rustls 0.26.1", "tokio-util", @@ -6313,6 +6545,17 @@ version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3582f63211428f83597b51b2ddb88e2a91a9d52d12831f9d08f5e624e8977422" +[[package]] +name = "rmp" +version = "0.8.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "228ed7c16fa39782c3b3468e974aec2795e9089153cd08ee2e9aefb3613334c4" +dependencies = [ + "byteorder", + "num-traits", + "paste", +] + [[package]] name = "rodio" version = "0.17.3" @@ -6483,7 +6726,7 @@ version = "1.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "16f1201b3c9a7ee8039bcadc17b7e605e2945b27eee7631788c1bd2b0643674b" dependencies = [ - "web-time", + "web-time 1.1.0", ] [[package]] @@ -7254,6 +7497,12 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "sync_wrapper" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2047c6ded9c721764247e62cd3b03c09ffc529b2ba5b10ec482ae507a4a70160" + [[package]] name = "sync_wrapper" version = "1.0.1" @@ -7692,6 +7941,34 @@ dependencies = [ "winnow 0.6.20", ] +[[package]] +name = "tonic" +version = "0.9.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3082666a3a6433f7f511c7192923fa1fe07c69332d3c6a2e6bb040b569199d5a" +dependencies = [ + "async-trait", + "axum 0.6.20", + "base64 0.21.7", + "bytes", + "futures-core", + "futures-util", + "h2 0.3.26", + "http 0.2.11", + "http-body 0.4.6", + "hyper 0.14.28", + "hyper-timeout 0.4.1", + "percent-encoding", + "pin-project", + "prost 0.11.9", + "tokio", + "tokio-stream", + "tower", + "tower-layer", + "tower-service", + "tracing", +] + [[package]] name = "tonic" version = "0.12.3" @@ -7700,7 +7977,7 @@ checksum = "877c5b330756d856ffcc4553ab34a5684481ade925ecc54bcd1bf02b1d0d4d52" dependencies = [ "async-stream", "async-trait", - "axum", + "axum 0.7.5", "base64 0.22.1", "bytes", "h2 0.4.6", @@ -7840,6 +8117,24 @@ dependencies = [ "tracing-core", ] +[[package]] +name = "tracing-opentelemetry" +version = "0.22.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c67ac25c5407e7b961fafc6f7e9aa5958fd297aada2d20fa2ae1737357e55596" +dependencies = [ + "js-sys", + "once_cell", + "opentelemetry 0.21.0", + "opentelemetry_sdk 0.21.2", + "smallvec", + "tracing", + "tracing-core", + "tracing-log", + "tracing-subscriber", + "web-time 0.2.4", +] + [[package]] name = "tracing-serde" version = "0.1.3" @@ -7865,6 +8160,7 @@ dependencies = [ "sharded-slab", "smallvec", "thread_local", + "time", "tracing", "tracing-core", "tracing-log", @@ -8320,6 +8616,16 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "web-time" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aa30049b1c872b72c89866d458eae9f20380ab280ffd1b1e18df2d3e2d98cfe0" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + [[package]] name = "web-time" version = "1.1.0" diff --git a/attest/src/main.rs b/attest/src/main.rs index c19bbd83..d238878f 100644 --- a/attest/src/main.rs +++ b/attest/src/main.rs @@ -1,8 +1,16 @@ +use tracing::info; + #[tokio::main] async fn main() -> color_eyre::Result<()> { color_eyre::install()?; orb_telemetry::TelemetryConfig::new() .with_journald(orb_attest::SYSLOG_IDENTIFIER) + .with_opentelemetry( + Some(orb_attest::SYSLOG_IDENTIFIER.to_string()), + Some("1.0.0".to_string()), + Some("orb".to_string()) + ) .init(); + orb_attest::main().await } diff --git a/telemetry/Cargo.toml b/telemetry/Cargo.toml index 08104544..24e5a573 100644 --- a/telemetry/Cargo.toml +++ b/telemetry/Cargo.toml @@ -12,8 +12,14 @@ rust-version.workspace = true [dependencies] tracing-journald.workspace = true -tracing-subscriber.workspace = true -tracing.workspace = true +opentelemetry = { version = "0.21", features = ["trace"] } +opentelemetry-otlp = { version = "0.14", features = ["trace", "tonic"] } +opentelemetry_sdk = { version = "0.21", features = ["trace", "rt-tokio"] } +opentelemetry-datadog = "0.10" +tracing-opentelemetry = "0.22" +tracing-subscriber = { version = "0.3.18", features = ["env-filter", "json", "time"] } +tracing = "0.1.40" + [target.'cfg(tokio_unstable)'.dependencies] console-subscriber.workspace = true diff --git a/telemetry/src/lib.rs b/telemetry/src/lib.rs index 98bb1ecb..d1d8567b 100644 --- a/telemetry/src/lib.rs +++ b/telemetry/src/lib.rs @@ -2,13 +2,39 @@ use std::io::IsTerminal as _; use tracing::level_filters::LevelFilter; use tracing_subscriber::{ - layer::SubscriberExt as _, util::SubscriberInitExt as _, EnvFilter, + layer::SubscriberExt as _, + EnvFilter, }; +use opentelemetry::{global, KeyValue}; +use opentelemetry::trace::TracerProvider; +use opentelemetry_sdk::propagation::TraceContextPropagator; +use opentelemetry_otlp::WithExportConfig; +use opentelemetry_sdk::{ + trace::{self, Sampler}, + runtime::Tokio, + Resource, +}; +use tracing_subscriber::util::SubscriberInitExt; + +/// A struct controlling how telemetry will be configured (logging + optional OpenTelemetry). #[derive(Debug)] pub struct TelemetryConfig { syslog_identifier: Option, global_filter: EnvFilter, + + /// If true, enable OTLP tracing via OpenTelemetry. + use_otel: bool, + + /// The service name used in opentelemetry's `Resource`. + service_name: Option, + /// The service version used in opentelemetry's `Resource`. + service_version: Option, + /// The environment used in opentelemetry's `Resource` (e.g. "prod", "stage"). + environment: Option, + + /// If we create an OTEL `TracerProvider`, store it here for optional shutdown. + tracer_provider: Option, } impl TelemetryConfig { @@ -21,7 +47,16 @@ impl TelemetryConfig { syslog_identifier: None, global_filter: EnvFilter::builder() .with_default_directive(LevelFilter::INFO.into()) - .from_env_lossy(), + // Spans from dependencies are emitted only at the error level + .parse_lossy(format!( + "info,zbus=error,h2=error,hyper=error,tonic=error,tower_http=error,{}", + std::env::var("RUST_LOG").unwrap_or_default() + )), + use_otel: false, + service_name: None, + service_version: None, + environment: None, + tracer_provider: None, } } @@ -37,7 +72,7 @@ impl TelemetryConfig { } /// Override the global filter to a custom filter. - /// Only do this if actually necessary to deviate from the orb's defaults. + /// Only do this if you actually need to deviate from orb's defaults. #[must_use] pub fn with_global_filter(self, filter: EnvFilter) -> Self { Self { @@ -46,20 +81,112 @@ impl TelemetryConfig { } } - pub fn try_init(self) -> Result<(), tracing_subscriber::util::TryInitError> { + /// Enable OpenTelemetry/OTLP tracing. + /// You can optionally provide a service name, version, and environment. + /// If omitted, these will default to environment variables (`SERVICE_NAME`, `SERVICE_VERSION`, `ENVIRONMENT`) or hard-coded strings. + #[must_use] + pub fn with_opentelemetry( + mut self, + service_name: Option, + service_version: Option, + environment: Option, + ) -> Self { + self.use_otel = true; + self.service_name = service_name; + self.service_version = service_version; + self.environment = environment; + self + } + + /// Initialize the OpenTelemetry TracerProvider and set it globally, returning it for storing. + fn init_opentelemetry(&mut self) -> Result> { + // Fallback to environment variables if the user did not supply them. + let default_service_name = + std::env::var("SERVICE_NAME").unwrap_or_else(|_| "orb-software".to_string()); + let default_service_version = + std::env::var("SERVICE_VERSION").unwrap_or_else(|_| "0.1.0".to_string()); + let default_environment = + std::env::var("ENVIRONMENT").unwrap_or_else(|_| "orb".to_string()); + + let service_name = self + .service_name + .clone() + .unwrap_or(default_service_name); + let service_version = self + .service_version + .clone() + .unwrap_or(default_service_version); + let environment = self + .environment + .clone() + .unwrap_or(default_environment); + + // Build an OpenTelemetry Resource with service metadata + let resource = Resource::new(vec![ + KeyValue::new("service.name", service_name), + KeyValue::new("service.version", service_version), + KeyValue::new("deployment.environment", environment), + ]); + + // OTLP endpoint from env or fallback + let otlp_endpoint = std::env::var("OTEL_EXPORTER_OTLP_ENDPOINT") + .unwrap_or_else(|_| "http://localhost:4317".to_string()); + + let exporter = opentelemetry_otlp::new_exporter() + .tonic() + .with_endpoint(otlp_endpoint) + .build_span_exporter()?; + + let trace_config = trace::config() + .with_resource(resource) + .with_sampler(Sampler::AlwaysOn); + + let tracer_provider = opentelemetry_sdk::trace::TracerProvider::builder() + .with_config(trace_config) + .with_batch_exporter(exporter, Tokio) + .build(); + + // Set the global tracer provider + global::set_tracer_provider(tracer_provider.clone()); + + // Use W3C propagation + global::set_text_map_propagator(TraceContextPropagator::new()); + + Ok(tracer_provider) + } + + /// Try to initialize telemetry (journald/stderr + optional OTLP). + /// Returns an error if something goes wrong setting up the subscriber stack. + pub fn try_init(mut self) -> Result<(), tracing_subscriber::util::TryInitError> { + // 1. If OTLP was requested, set up the tracer provider + if self.use_otel { + match self.init_opentelemetry() { + Ok(provider) => { + self.tracer_provider = Some(provider); + } + Err(err) => { + eprintln!("Failed to initialize OTLP exporter: {err}"); + // Degrade gracefully to journald/stderr logs + } + } + } + + // 2. Base journald/stderr logging setup let registry = tracing_subscriber::registry(); - // The type is only there to get it to compile. + + // If tokio_unstable is enabled, we can gather runtime metrics let tokio_console_layer: Option = None; #[cfg(tokio_unstable)] let tokio_console_layer = console_subscriber::spawn(); - // Checking for a terminal helps detect if we are running under systemd. + + // If we're not attached to a terminal, assume journald is the intended output let journald_layer = if !std::io::stderr().is_terminal() { self.syslog_identifier.and_then(|syslog_identifier| { tracing_journald::layer() .inspect_err(|err| { eprintln!( "failed connecting to journald socket. \ - will write to stderr: {err}" + will write to stderr: {err}" ); }) .map(|layer| layer.with_syslog_identifier(syslog_identifier)) @@ -68,24 +195,44 @@ impl TelemetryConfig { } else { None }; + + // If journald is not available or we're in a TTY, fallback to stderr let stderr_layer = journald_layer .is_none() .then(|| tracing_subscriber::fmt::layer().with_writer(std::io::stderr)); - assert!(stderr_layer.is_some() || journald_layer.is_some()); + + // 3. If OTLP tracing is available, attach a tracing-opentelemetry layer + let otlp_layer = self.tracer_provider.as_ref().map(|provider| { + let tracer = provider.tracer("orb-telemetry"); + tracing_opentelemetry::layer().with_tracer(tracer) + }); + + // 4. Build the final subscriber registry .with(tokio_console_layer) .with(stderr_layer) .with(journald_layer) + .with(otlp_layer) .with(self.global_filter) .try_init() } - /// Initializes the telemetry config. Call this only once, at the beginning of the - /// program. - /// - /// Calling this more than once or when another tracing subscriber is registered - /// will cause a panic. + /// Initializes telemetry, panicking if something goes wrong. pub fn init(self) { - self.try_init().expect("failed to initialize orb-telemetry") + self.try_init().expect("failed to initialize orb-telemetry"); + } + + /// Optional shutdown hook to flush any pending OTLP spans. + /// For journald/stderr, it's usually not necessary. + pub fn shutdown_tracing(&self) { + if self.tracer_provider.is_some() { + // This ensures that spans are flushed before exit + global::shutdown_tracer_provider(); + } } } + +pub async fn shutdown_tracing() { + // Ensure all spans are flushed + global::shutdown_tracer_provider(); +} \ No newline at end of file