From 6618c75d02b09cf9c9615e4c963a8312e7bed809 Mon Sep 17 00:00:00 2001 From: Caleb Gilmour Date: Wed, 27 Sep 2023 01:50:17 +0000 Subject: [PATCH 01/39] WIP. Working API, limited metrics, missing tags for metrics --- CMakeLists.txt | 4 + src/datadog/datadog_agent.cpp | 118 ++++++++++++++++++++++-- src/datadog/datadog_agent.h | 9 +- src/datadog/metrics.cpp | 32 +++++++ src/datadog/metrics.h | 43 +++++++++ src/datadog/trace_segment.cpp | 15 ++++ src/datadog/trace_segment.h | 3 + src/datadog/tracer.cpp | 21 ++--- src/datadog/tracer.h | 2 + src/datadog/tracer_config.h | 3 + src/datadog/tracer_telemetry.cpp | 149 +++++++++++++++++++++++++++++++ src/datadog/tracer_telemetry.h | 47 ++++++++++ 12 files changed, 427 insertions(+), 19 deletions(-) create mode 100644 src/datadog/metrics.cpp create mode 100644 src/datadog/metrics.h create mode 100644 src/datadog/tracer_telemetry.cpp create mode 100644 src/datadog/tracer_telemetry.h diff --git a/CMakeLists.txt b/CMakeLists.txt index bad0080e..f97a22d8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -110,6 +110,7 @@ target_sources(dd_trace_cpp-objects PRIVATE src/datadog/id_generator.cpp src/datadog/limiter.cpp src/datadog/logger.cpp + src/datadog/metrics.cpp src/datadog/msgpack.cpp src/datadog/null_collector.cpp src/datadog/parse_util.cpp @@ -127,6 +128,7 @@ target_sources(dd_trace_cpp-objects PRIVATE src/datadog/tags.cpp src/datadog/threaded_event_scheduler.cpp src/datadog/tracer_config.cpp + src/datadog/tracer_telemetry.cpp src/datadog/tracer.cpp src/datadog/trace_id.cpp src/datadog/trace_sampler_config.cpp @@ -165,6 +167,7 @@ target_sources(dd_trace_cpp-objects PUBLIC src/datadog/json.hpp src/datadog/limiter.h src/datadog/logger.h + src/datadog/metrics.h src/datadog/msgpack.h src/datadog/null_collector.h src/datadog/optional.h @@ -189,6 +192,7 @@ target_sources(dd_trace_cpp-objects PUBLIC src/datadog/tags.h src/datadog/threaded_event_scheduler.h src/datadog/tracer_config.h + src/datadog/tracer_telemetry.h src/datadog/tracer.h src/datadog/trace_id.h src/datadog/trace_sampler_config.h diff --git a/src/datadog/datadog_agent.cpp b/src/datadog/datadog_agent.cpp index 2107500d..dbc74009 100644 --- a/src/datadog/datadog_agent.cpp +++ b/src/datadog/datadog_agent.cpp @@ -22,6 +22,7 @@ namespace tracing { namespace { const StringView traces_api_path = "/v0.4/traces"; +const StringView telemetry_v2_path = "/telemetry/proxy/api/v2/apmtelemetry"; HTTPClient::URL traces_endpoint(const HTTPClient::URL& agent_url) { auto traces_url = agent_url; @@ -29,6 +30,12 @@ HTTPClient::URL traces_endpoint(const HTTPClient::URL& agent_url) { return traces_url; } +HTTPClient::URL telemetry_endpoint(const HTTPClient::URL& agent_url) { + auto telemetry_v2_url = agent_url; + append(telemetry_v2_url.path, telemetry_v2_path); + return telemetry_v2_url; +} + Expected msgpack_encode( std::string& destination, const std::vector& trace_chunks) { @@ -124,18 +131,30 @@ std::variant parse_agent_traces_response( } // namespace -DatadogAgent::DatadogAgent(const FinalizedDatadogAgentConfig& config, - const Clock& clock, - const std::shared_ptr& logger) - : clock_(clock), +DatadogAgent::DatadogAgent( + const FinalizedDatadogAgentConfig& config, + const std::shared_ptr& tracer_telemetry, + const Clock& clock, const std::shared_ptr& logger) + : tracer_telemetry_(tracer_telemetry), + clock_(clock), logger_(logger), traces_endpoint_(traces_endpoint(config.url)), + telemetry_endpoint_(telemetry_endpoint(config.url)), http_client_(config.http_client), event_scheduler_(config.event_scheduler), cancel_scheduled_flush_(event_scheduler_->schedule_recurring_event( config.flush_interval, [this]() { flush(); })), + cancel_heartbeat_timer_(event_scheduler_->schedule_recurring_event( + std::chrono::seconds(10), [this, n=0]() mutable { + n++; + tracer_telemetry_->captureMetrics(); + if (n%6 == 0) { + sendHeartbeatAndTelemetry(); + } + })), flush_interval_(config.flush_interval) { assert(logger_); + sendAppStarted(); } DatadogAgent::~DatadogAgent() { @@ -154,7 +173,6 @@ Expected DatadogAgent::send( } nlohmann::json DatadogAgent::config_json() const { - const auto& url = traces_endpoint_; // brevity const auto flush_interval_milliseconds = std::chrono::duration_cast(flush_interval_) .count(); @@ -163,7 +181,8 @@ nlohmann::json DatadogAgent::config_json() const { return nlohmann::json::object({ {"type", "datadog::tracing::DatadogAgent"}, {"config", nlohmann::json::object({ - {"url", (url.scheme + "://" + url.authority + url.path)}, + {"traces_url", (traces_endpoint_.scheme + "://" + traces_endpoint_.authority + traces_endpoint_.path)}, + {"telemetry_url", (telemetry_endpoint_.scheme + "://" + telemetry_endpoint_.authority + telemetry_endpoint_.path)}, {"flush_interval_milliseconds", flush_interval_milliseconds}, {"http_client", http_client_->config_json()}, {"event_scheduler", event_scheduler_->config_json()}, @@ -251,10 +270,11 @@ void DatadogAgent::flush() { // request or retrieving the response. It's invoked // asynchronously. auto on_error = [logger = logger_](Error error) { - logger->log_error( - error.with_prefix("Error occurred during HTTP request: ")); + logger->log_error(error.with_prefix( + "Error occurred during HTTP request for submitting traces: ")); }; + tracer_telemetry_->trace_api_requests().inc(); auto post_result = http_client_->post( traces_endpoint_, std::move(set_request_headers), std::move(body), std::move(on_response), std::move(on_error)); @@ -263,5 +283,87 @@ void DatadogAgent::flush() { } } +void DatadogAgent::sendAppStarted() { + auto payload = tracer_telemetry_->appStarted(); + auto set_request_headers = [&](DictWriter& headers) { + headers.set("Content-Type", "application/json"); + }; + + // Callback for a successful HTTP request, to examine HTTP status. + auto on_response = [logger = logger_](int response_status, + const DictReader& /*response_headers*/, + std::string response_body) { + if (response_status < 200 || response_status >= 300) { + logger->log_error([&](auto& stream) { + stream << "Unexpected telemetry response status " << response_status + << " with body (starts on next line):\n" + << response_body; + }); + return; + } else { + logger->log_error([&](auto& stream) { + stream << "Successful telemetry submission with response status " << response_status + << " and body (starts on next line):\n" + << response_body; + }); + } + + }; + + // Callback for unsuccessful HTTP request. + auto on_error = [logger = logger_](Error error) { + logger->log_error(error.with_prefix( + "Error occurred during HTTP request for telemetry: ")); + }; + + auto post_result = http_client_->post( + telemetry_endpoint_, std::move(set_request_headers), std::move(payload), + std::move(on_response), std::move(on_error)); + if (auto* error = post_result.if_error()) { + logger_->log_error(*error); + } +} + +void DatadogAgent::sendHeartbeatAndTelemetry() { + auto payload = tracer_telemetry_->heartbeatAndTelemetry(); + auto set_request_headers = [&](DictWriter& headers) { + headers.set("Content-Type", "application/json"); + }; + + // Callback for a successful HTTP request, to examine HTTP status. + auto on_response = [logger = logger_](int response_status, + const DictReader& /*response_headers*/, + std::string response_body) { + if (response_status < 200 || response_status >= 300) { + logger->log_error([&](auto& stream) { + stream << "Unexpected telemetry response status " << response_status + << " with body (starts on next line):\n" + << response_body; + }); + return; + } else { + logger->log_error([&](auto& stream) { + stream << "Successful telemetry submission with response status " << response_status + << " and body (starts on next line):\n" + << response_body; + }); + } + + }; + + // Callback for unsuccessful HTTP request. + auto on_error = [logger = logger_](Error error) { + logger->log_error(error.with_prefix( + "Error occurred during HTTP request for telemetry: ")); + }; + + auto post_result = http_client_->post( + telemetry_endpoint_, std::move(set_request_headers), std::move(payload), + std::move(on_response), std::move(on_error)); + if (auto* error = post_result.if_error()) { + logger_->log_error(*error); + } +} + } // namespace tracing } // namespace datadog diff --git a/src/datadog/datadog_agent.h b/src/datadog/datadog_agent.h index de7aba54..4da4b319 100644 --- a/src/datadog/datadog_agent.h +++ b/src/datadog/datadog_agent.h @@ -14,6 +14,7 @@ #include "collector.h" #include "event_scheduler.h" #include "http_client.h" +#include "tracer_telemetry.h" namespace datadog { namespace tracing { @@ -32,25 +33,31 @@ class DatadogAgent : public Collector { private: std::mutex mutex_; + std::shared_ptr tracer_telemetry_; Clock clock_; std::shared_ptr logger_; std::vector trace_chunks_; HTTPClient::URL traces_endpoint_; + HTTPClient::URL telemetry_endpoint_; std::shared_ptr http_client_; std::shared_ptr event_scheduler_; EventScheduler::Cancel cancel_scheduled_flush_; + EventScheduler::Cancel cancel_heartbeat_timer_; std::chrono::steady_clock::duration flush_interval_; void flush(); public: - DatadogAgent(const FinalizedDatadogAgentConfig&, const Clock& clock, + DatadogAgent(const FinalizedDatadogAgentConfig&, + const std::shared_ptr&, const Clock& clock, const std::shared_ptr&); ~DatadogAgent(); Expected send( std::vector>&& spans, const std::shared_ptr& response_handler) override; + void sendAppStarted(); + void sendHeartbeatAndTelemetry(); nlohmann::json config_json() const override; }; diff --git a/src/datadog/metrics.cpp b/src/datadog/metrics.cpp new file mode 100644 index 00000000..4d4c396b --- /dev/null +++ b/src/datadog/metrics.cpp @@ -0,0 +1,32 @@ +#include "metrics.h" + +#include "json.hpp" + +namespace datadog { +namespace tracing { + +Metric::Metric(std::string name, std::string type, bool common) : name_(name), type_(type), common_(common) {} +std::string Metric::name() { return name_; } +std::string Metric::type() { return type_; } +bool Metric::common() { return common_; } +uint64_t Metric::value() { return value_; } + +CounterMetric::CounterMetric(std::string name, bool common) : Metric(name, "count", common) {} +void CounterMetric::inc() { add(1); } +void CounterMetric::add(uint64_t amount) { value_ += amount; } + +GaugeMetric::GaugeMetric(std::string name, bool common) : Metric(name, "gauge", common) {} +void GaugeMetric::set(uint64_t value) { value_ = value; } +void GaugeMetric::inc() { add(1); } +void GaugeMetric::add(uint64_t amount) { value_ += amount; } +void GaugeMetric::dec() { sub(1); } +void GaugeMetric::sub(uint64_t amount) { + if (amount > value_) { + value_ = 0; + } else { + value_ -= amount; + } +} + +} // namespace tracing +} // namespace datadog diff --git a/src/datadog/metrics.h b/src/datadog/metrics.h new file mode 100644 index 00000000..75c48f7a --- /dev/null +++ b/src/datadog/metrics.h @@ -0,0 +1,43 @@ +#pragma once + +#include + +#include "json_fwd.hpp" +#include "string_view.h" + +namespace datadog { +namespace tracing { + +class Metric { + std::string name_; + std::string type_; + bool common_; + protected: + std::atomic value_ = 0; + Metric(std::string name, std::string type, bool common); + public: + std::string name(); + std::string type(); + bool common(); + uint64_t value(); +}; + +class CounterMetric : public Metric { + public: + CounterMetric(std::string name, bool common); + void inc(); + void add(uint64_t amount); +}; + +class GaugeMetric : public Metric { + public: + GaugeMetric(std::string name, bool common); + void set(uint64_t value); + void inc(); + void add(uint64_t amount); + void dec(); + void sub(uint64_t amount); +}; + +} // namespace tracing +} // namespace datadog diff --git a/src/datadog/trace_segment.cpp b/src/datadog/trace_segment.cpp index f5279e71..9b92a69d 100644 --- a/src/datadog/trace_segment.cpp +++ b/src/datadog/trace_segment.cpp @@ -83,6 +83,7 @@ void inject_trace_tags( TraceSegment::TraceSegment( const std::shared_ptr& logger, const std::shared_ptr& collector, + const std::shared_ptr& tracer_telemetry, const std::shared_ptr& trace_sampler, const std::shared_ptr& span_sampler, const std::shared_ptr& defaults, @@ -96,6 +97,7 @@ TraceSegment::TraceSegment( std::unique_ptr local_root) : logger_(logger), collector_(collector), + tracer_telemetry_(tracer_telemetry), trace_sampler_(trace_sampler), span_sampler_(span_sampler), defaults_(defaults), @@ -111,10 +113,14 @@ TraceSegment::TraceSegment( std::move(additional_datadog_w3c_tracestate)) { assert(logger_); assert(collector_); + assert(tracer_telemetry_); assert(trace_sampler_); assert(span_sampler_); assert(defaults_); + tracer_telemetry_->traces_started().inc(); + tracer_telemetry_->active_traces().inc(); + register_span(std::move(local_root)); } @@ -135,12 +141,18 @@ Optional TraceSegment::sampling_decision() const { Logger& TraceSegment::logger() const { return *logger_; } void TraceSegment::register_span(std::unique_ptr span) { + tracer_telemetry_->spans_started().inc(); + tracer_telemetry_->active_spans().inc(); + std::lock_guard lock(mutex_); assert(spans_.empty() || num_finished_spans_ < spans_.size()); spans_.emplace_back(std::move(span)); } void TraceSegment::span_finished() { + tracer_telemetry_->spans_finished().inc(); + tracer_telemetry_->active_spans().dec(); + { std::lock_guard lock(mutex_); ++num_finished_spans_; @@ -220,6 +232,9 @@ void TraceSegment::span_finished() { logger_->log_error( error->with_prefix("Error sending spans to collector: ")); } + + tracer_telemetry_->traces_finished().inc(); + tracer_telemetry_->active_traces().dec(); } void TraceSegment::override_sampling_priority(int priority) { diff --git a/src/datadog/trace_segment.h b/src/datadog/trace_segment.h index f3d0c05f..0e233447 100644 --- a/src/datadog/trace_segment.h +++ b/src/datadog/trace_segment.h @@ -36,6 +36,7 @@ #include "optional.h" #include "propagation_style.h" #include "sampling_decision.h" +#include "tracer_telemetry.h" namespace datadog { namespace tracing { @@ -54,6 +55,7 @@ class TraceSegment { std::shared_ptr logger_; std::shared_ptr collector_; + std::shared_ptr tracer_telemetry_; std::shared_ptr trace_sampler_; std::shared_ptr span_sampler_; @@ -74,6 +76,7 @@ class TraceSegment { public: TraceSegment(const std::shared_ptr& logger, const std::shared_ptr& collector, + const std::shared_ptr& tracer_telemetry, const std::shared_ptr& trace_sampler, const std::shared_ptr& span_sampler, const std::shared_ptr& defaults, diff --git a/src/datadog/tracer.cpp b/src/datadog/tracer.cpp index 602d8ac0..e94cb535 100644 --- a/src/datadog/tracer.cpp +++ b/src/datadog/tracer.cpp @@ -243,6 +243,7 @@ Tracer::Tracer(const FinalizedTracerConfig& config, const Clock& clock) : logger_(config.logger), collector_(/* see constructor body */), + tracer_telemetry_(std::make_shared(clock, config)), trace_sampler_( std::make_shared(config.trace_sampler, clock)), span_sampler_(std::make_shared(config.span_sampler, clock)), @@ -259,8 +260,8 @@ Tracer::Tracer(const FinalizedTracerConfig& config, } else { auto& agent_config = std::get(config.collector); - collector_ = - std::make_shared(agent_config, clock, config.logger); + collector_ = std::make_shared(agent_config, tracer_telemetry_, + clock, config.logger); } if (config.log_on_startup) { @@ -293,10 +294,10 @@ Span Tracer::create_span(const SpanConfig& config) { const auto span_data_ptr = span_data.get(); const auto segment = std::make_shared( - logger_, collector_, trace_sampler_, span_sampler_, defaults_, - injection_styles_, hostname_, nullopt /* origin */, tags_header_max_size_, - std::move(trace_tags), nullopt /* sampling_decision */, - nullopt /* additional_w3c_tracestate */, + logger_, collector_, tracer_telemetry_, trace_sampler_, span_sampler_, + defaults_, injection_styles_, hostname_, nullopt /* origin */, + tags_header_max_size_, std::move(trace_tags), + nullopt /* sampling_decision */, nullopt /* additional_w3c_tracestate */, nullopt /* additional_datadog_w3c_tracestate*/, std::move(span_data)); Span span{span_data_ptr, segment, [generator = generator_]() { return generator->span_id(); }, @@ -456,10 +457,10 @@ Expected Tracer::extract_span(const DictReader& reader, const auto span_data_ptr = span_data.get(); const auto segment = std::make_shared( - logger_, collector_, trace_sampler_, span_sampler_, defaults_, - injection_styles_, hostname_, std::move(origin), tags_header_max_size_, - std::move(trace_tags), std::move(sampling_decision), - std::move(additional_w3c_tracestate), + logger_, collector_, tracer_telemetry_, trace_sampler_, span_sampler_, + defaults_, injection_styles_, hostname_, std::move(origin), + tags_header_max_size_, std::move(trace_tags), + std::move(sampling_decision), std::move(additional_w3c_tracestate), std::move(additional_datadog_w3c_tracestate), std::move(span_data)); Span span{span_data_ptr, segment, [generator = generator_]() { return generator->span_id(); }, diff --git a/src/datadog/tracer.h b/src/datadog/tracer.h index e2c54f85..a473daf5 100644 --- a/src/datadog/tracer.h +++ b/src/datadog/tracer.h @@ -18,6 +18,7 @@ #include "optional.h" #include "span.h" #include "tracer_config.h" +#include "tracer_telemetry.h" namespace datadog { namespace tracing { @@ -30,6 +31,7 @@ class SpanSampler; class Tracer { std::shared_ptr logger_; std::shared_ptr collector_; + std::shared_ptr tracer_telemetry_; std::shared_ptr trace_sampler_; std::shared_ptr span_sampler_; std::shared_ptr generator_; diff --git a/src/datadog/tracer_config.h b/src/datadog/tracer_config.h index 275a0202..7382d87f 100644 --- a/src/datadog/tracer_config.h +++ b/src/datadog/tracer_config.h @@ -13,6 +13,7 @@ #include "error.h" #include "expected.h" #include "propagation_style.h" +#include "random.h" #include "span_defaults.h" #include "span_sampler_config.h" #include "trace_sampler_config.h" @@ -129,6 +130,8 @@ class FinalizedTracerConfig { std::shared_ptr logger; bool log_on_startup; bool trace_id_128_bit; + + std::string runtime_id = uuid(); }; // Return a `FinalizedTracerConfig` from the specified `config` and from any diff --git a/src/datadog/tracer_telemetry.cpp b/src/datadog/tracer_telemetry.cpp new file mode 100644 index 00000000..fee33a73 --- /dev/null +++ b/src/datadog/tracer_telemetry.cpp @@ -0,0 +1,149 @@ +#include "tracer_telemetry.h" + +#include + +#include "json.hpp" +#include "logger.h" +#include "platform_util.h" +#include "version.h" + +namespace datadog { +namespace tracing { + +TracerTelemetry::TracerTelemetry(const Clock& clock, + const FinalizedTracerConfig& config) + : clock_(clock), config_(config) { + /* + metrics_.emplace_back(traces_started_, MetricSnapshot{}); + metrics_.emplace_back(traces_finished_, MetricSnapshot{}); + metrics_.emplace_back(active_traces_, MetricSnapshot{}); + */ + metrics_.emplace_back(trace_api_requests_, MetricSnapshot{}); + } + + +std::string TracerTelemetry::appStarted() { + time_t tracer_time = std::chrono::duration_cast( + clock_().wall.time_since_epoch()) + .count(); + std::string hostname = get_hostname().value_or("hostname-unavailable"); + config_.logger->log_error([&](auto& stream) { + stream << "app-started: hostname=" << hostname << " seq_id=" << seq_id; + }); + + seq_id++; + auto payload = + nlohmann::json::object({ + {"api_version", "v2"}, + {"seq_id", seq_id}, + {"request_type", "app-started"}, + {"tracer_time", tracer_time}, + {"runtime_id", "0524398a-11e2-4375-a637-619eb9148e8f"}, + {"debug", true}, + {"application", + nlohmann::json::object({ + {"service_name", config_.defaults.service}, + {"env", config_.defaults.environment}, + {"tracer_version", tracer_version_string}, + {"language_name", "cpp"}, + {"language_version", std::to_string(__cplusplus)}, + })}, + // TODO: host information (hostname, os, os_version, kernel, etc) + {"host", nlohmann::json::object({ + {"hostname", hostname}, + })}, + {"payload", + nlohmann::json::object({ + {"configuration", nlohmann::json::array({ + // TODO: environment variables or + // finalized config details + })}, + + })}, + }) + .dump(); + + return payload; +} + +void TracerTelemetry::captureMetrics() { + time_t timepoint = std::chrono::duration_cast( + clock_().wall.time_since_epoch()) + .count(); + for (auto& m : metrics_) { + m.second.emplace_back(timepoint, m.first.get().value()); + } + + for (auto& m : metrics_) { + std::cout << "metrics: " << m.first.get().name() << std::endl; + for (auto& v : m.second) { + std::cout << v.first << " " << v.second << std::endl; + } + } +} + +std::string TracerTelemetry::heartbeatAndTelemetry() { + time_t tracer_time = std::chrono::duration_cast( + clock_().wall.time_since_epoch()) + .count(); + std::string hostname = get_hostname().value_or("hostname-unavailable"); + + auto heartbeat = nlohmann::json::object({ + {"request_type", "app-heartbeat"}, + }); + + auto metrics = nlohmann::json::array(); + for (auto& m : metrics_) { + auto& metric = m.first.get(); + auto& points = m.second; + metrics.emplace_back(nlohmann::json::object({ + {"metric", metric.name()}, + {"type", metric.type()}, + {"interval", 60}, + {"points", points}, + {"common", metric.common()}, + })); + m.second.clear(); + } + + auto generate_metrics = nlohmann::json::object({ + {"request_type", "generate-metrics"}, + {"payload", nlohmann::json::object({ + {"namespace", "tracers"}, + {"series", metrics}, + })}, + }); + + seq_id++; + auto payload = nlohmann::json::object({ + {"api_version", "v2"}, + {"seq_id", seq_id}, + {"request_type", "message-batch"}, + {"tracer_time", tracer_time}, + {"runtime_id", "0524398a-11e2-4375-a637-619eb9148e8f"}, + {"debug", true}, + {"application", + nlohmann::json::object({ + {"service_name", config_.defaults.service}, + {"env", config_.defaults.environment}, + {"tracer_version", tracer_version_string}, + {"language_name", "cpp"}, + {"language_version", std::to_string(__cplusplus)}, + })}, + // TODO: host information (hostname, os, os_version, kernel, etc) + {"host", nlohmann::json::object({ + {"hostname", hostname}, + })}, + {"payload", nlohmann::json::array({ + heartbeat, + generate_metrics, + })}, + }).dump(); + config_.logger->log_error([&](auto& stream) { + stream << "telemetry payload: " << payload; + }); + return payload; +} + +} // namespace tracing +} // namespace datadog diff --git a/src/datadog/tracer_telemetry.h b/src/datadog/tracer_telemetry.h new file mode 100644 index 00000000..cab7d8c1 --- /dev/null +++ b/src/datadog/tracer_telemetry.h @@ -0,0 +1,47 @@ +#pragma once +#include + +#include "clock.h" +#include "metrics.h" +#include "tracer_config.h" + +namespace datadog { +namespace tracing { +class TracerTelemetry { + Clock clock_; + FinalizedTracerConfig config_; + uint64_t seq_id = 0; + std::vector, std::vector>>> counter_metrics_; + std::vector, std::vector>>> gauge_metrics_; + using MetricSnapshot = std::vector>; + std::vector, MetricSnapshot>> metrics_; + + CounterMetric traces_started_ = {"traces_started", true}; + CounterMetric traces_finished_ = {"traces_finished", true}; + GaugeMetric active_traces_ = {"active_traces", true}; + + CounterMetric spans_started_ = {"spans_started", true}; + CounterMetric spans_finished_ = {"spans_finished", true}; + GaugeMetric active_spans_ = {"active_spans", true}; + + CounterMetric trace_api_requests_ = {"trace_api.requests", true}; + + public: + TracerTelemetry(const Clock& clock, const FinalizedTracerConfig& config); + std::string appStarted(); + void captureMetrics(); + std::string heartbeatAndTelemetry(); + + CounterMetric& traces_started() { return traces_started_; }; + CounterMetric& traces_finished() { return traces_finished_; }; + GaugeMetric& active_traces() { return active_traces_; }; + + CounterMetric& spans_started() { return spans_started_; }; + CounterMetric& spans_finished() { return spans_finished_; }; + GaugeMetric& active_spans() { return active_spans_; }; + + CounterMetric& trace_api_requests() { return trace_api_requests_; }; +}; + +} // namespace tracing +} // namespace datadog From 890490dfb53c32a8dd457572cfced4e72f5fd385 Mon Sep 17 00:00:00 2001 From: Caleb Gilmour Date: Fri, 29 Sep 2023 09:10:58 +0000 Subject: [PATCH 02/39] Redesign to support tagged metrics --- src/datadog/datadog_agent.cpp | 71 ++++++++------ src/datadog/datadog_agent.h | 3 +- src/datadog/metrics.cpp | 17 +++- src/datadog/metrics.h | 28 +++--- src/datadog/trace_segment.cpp | 13 +-- src/datadog/trace_segment.h | 1 + src/datadog/tracer.cpp | 2 + src/datadog/tracer_telemetry.cpp | 158 ++++++++++++++++++------------- src/datadog/tracer_telemetry.h | 64 ++++++++----- 9 files changed, 211 insertions(+), 146 deletions(-) diff --git a/src/datadog/datadog_agent.cpp b/src/datadog/datadog_agent.cpp index dbc74009..eaad31b1 100644 --- a/src/datadog/datadog_agent.cpp +++ b/src/datadog/datadog_agent.cpp @@ -144,16 +144,18 @@ DatadogAgent::DatadogAgent( event_scheduler_(config.event_scheduler), cancel_scheduled_flush_(event_scheduler_->schedule_recurring_event( config.flush_interval, [this]() { flush(); })), - cancel_heartbeat_timer_(event_scheduler_->schedule_recurring_event( - std::chrono::seconds(10), [this, n=0]() mutable { + cancel_telemetry_timer_(event_scheduler_->schedule_recurring_event( + std::chrono::seconds(10), + [this, n = 0]() mutable { n++; - tracer_telemetry_->captureMetrics(); - if (n%6 == 0) { - sendHeartbeatAndTelemetry(); + tracer_telemetry_->capture_metrics(); + if (n % 6 == 0) { + sendHeartbeatAndTelemetry(); } - })), + })), flush_interval_(config.flush_interval) { assert(logger_); + assert(tracer_telemetry_); sendAppStarted(); } @@ -161,6 +163,7 @@ DatadogAgent::~DatadogAgent() { const auto deadline = clock_().tick + std::chrono::seconds(2); cancel_scheduled_flush_(); flush(); + cancel_telemetry_timer_(); http_client_->drain(deadline); } @@ -230,10 +233,21 @@ void DatadogAgent::flush() { // This is the callback for the HTTP response. It's invoked // asynchronously. - auto on_response = [samplers = std::move(response_handlers), + auto on_response = [this, samplers = std::move(response_handlers), logger = logger_](int response_status, const DictReader& /*response_headers*/, std::string response_body) { + if (response_status >= 500) { + tracer_telemetry_->metrics().trace_api.responses_5xx.inc(); + } else if (response_status >= 400) { + tracer_telemetry_->metrics().trace_api.responses_4xx.inc(); + } else if (response_status >= 300) { + tracer_telemetry_->metrics().trace_api.responses_3xx.inc(); + } else if (response_status >= 200) { + tracer_telemetry_->metrics().trace_api.responses_2xx.inc(); + } else if (response_status >= 100) { + tracer_telemetry_->metrics().trace_api.responses_1xx.inc(); + } if (response_status != 200) { logger->log_error([&](auto& stream) { stream << "Unexpected response status " << response_status @@ -269,12 +283,13 @@ void DatadogAgent::flush() { // This is the callback for if something goes wrong sending the // request or retrieving the response. It's invoked // asynchronously. - auto on_error = [logger = logger_](Error error) { + auto on_error = [this, logger = logger_](Error error) { + tracer_telemetry_->metrics().trace_api.errors_network.inc(); logger->log_error(error.with_prefix( "Error occurred during HTTP request for submitting traces: ")); }; - tracer_telemetry_->trace_api_requests().inc(); + tracer_telemetry_->metrics().trace_api.requests.inc(); auto post_result = http_client_->post( traces_endpoint_, std::move(set_request_headers), std::move(body), std::move(on_response), std::move(on_error)); @@ -284,7 +299,7 @@ void DatadogAgent::flush() { } void DatadogAgent::sendAppStarted() { - auto payload = tracer_telemetry_->appStarted(); + auto payload = tracer_telemetry_->app_started(); auto set_request_headers = [&](DictWriter& headers) { headers.set("Content-Type", "application/json"); }; @@ -295,19 +310,18 @@ void DatadogAgent::sendAppStarted() { std::string response_body) { if (response_status < 200 || response_status >= 300) { logger->log_error([&](auto& stream) { - stream << "Unexpected telemetry response status " << response_status - << " with body (starts on next line):\n" - << response_body; - }); + stream << "Unexpected telemetry response status " << response_status + << " with body (starts on next line):\n" + << response_body; + }); return; } else { logger->log_error([&](auto& stream) { - stream << "Successful telemetry submission with response status " << response_status - << " and body (starts on next line):\n" - << response_body; - }); + stream << "Successful telemetry submission with response status " + << response_status << " and body (starts on next line):\n" + << response_body; + }); } - }; // Callback for unsuccessful HTTP request. @@ -325,7 +339,7 @@ void DatadogAgent::sendAppStarted() { } void DatadogAgent::sendHeartbeatAndTelemetry() { - auto payload = tracer_telemetry_->heartbeatAndTelemetry(); + auto payload = tracer_telemetry_->heartbeat_and_telemetry(); auto set_request_headers = [&](DictWriter& headers) { headers.set("Content-Type", "application/json"); }; @@ -336,19 +350,18 @@ void DatadogAgent::sendHeartbeatAndTelemetry() { std::string response_body) { if (response_status < 200 || response_status >= 300) { logger->log_error([&](auto& stream) { - stream << "Unexpected telemetry response status " << response_status - << " with body (starts on next line):\n" - << response_body; - }); + stream << "Unexpected telemetry response status " << response_status + << " with body (starts on next line):\n" + << response_body; + }); return; } else { logger->log_error([&](auto& stream) { - stream << "Successful telemetry submission with response status " << response_status - << " and body (starts on next line):\n" - << response_body; - }); + stream << "Successful telemetry submission with response status " + << response_status << " and body (starts on next line):\n" + << response_body; + }); } - }; // Callback for unsuccessful HTTP request. diff --git a/src/datadog/datadog_agent.h b/src/datadog/datadog_agent.h index 4da4b319..bd33e27c 100644 --- a/src/datadog/datadog_agent.h +++ b/src/datadog/datadog_agent.h @@ -14,6 +14,7 @@ #include "collector.h" #include "event_scheduler.h" #include "http_client.h" +#include "metrics.h" #include "tracer_telemetry.h" namespace datadog { @@ -42,7 +43,7 @@ class DatadogAgent : public Collector { std::shared_ptr http_client_; std::shared_ptr event_scheduler_; EventScheduler::Cancel cancel_scheduled_flush_; - EventScheduler::Cancel cancel_heartbeat_timer_; + EventScheduler::Cancel cancel_telemetry_timer_; std::chrono::steady_clock::duration flush_interval_; void flush(); diff --git a/src/datadog/metrics.cpp b/src/datadog/metrics.cpp index 4d4c396b..8461eecd 100644 --- a/src/datadog/metrics.cpp +++ b/src/datadog/metrics.cpp @@ -5,17 +5,24 @@ namespace datadog { namespace tracing { -Metric::Metric(std::string name, std::string type, bool common) : name_(name), type_(type), common_(common) {} -std::string Metric::name() { return name_; } -std::string Metric::type() { return type_; } +Metric::Metric(const std::string name, std::string type, + const std::vector tags, bool common) + : name_(name), type_(type), tags_(tags), common_(common) {} +const std::string Metric::name() { return name_; } +const std::string Metric::type() { return type_; } +const std::vector Metric::tags() { return tags_; } bool Metric::common() { return common_; } uint64_t Metric::value() { return value_; } -CounterMetric::CounterMetric(std::string name, bool common) : Metric(name, "count", common) {} +CounterMetric::CounterMetric(const std::string name, + const std::vector tags, bool common) + : Metric(name, "count", tags, common) {} void CounterMetric::inc() { add(1); } void CounterMetric::add(uint64_t amount) { value_ += amount; } -GaugeMetric::GaugeMetric(std::string name, bool common) : Metric(name, "gauge", common) {} +GaugeMetric::GaugeMetric(const std::string name, + const std::vector tags, bool common) + : Metric(name, "gauge", tags, common) {} void GaugeMetric::set(uint64_t value) { value_ = value; } void GaugeMetric::inc() { add(1); } void GaugeMetric::add(uint64_t amount) { value_ += amount; } diff --git a/src/datadog/metrics.h b/src/datadog/metrics.h index 75c48f7a..23dafe2e 100644 --- a/src/datadog/metrics.h +++ b/src/datadog/metrics.h @@ -1,37 +1,43 @@ #pragma once #include - -#include "json_fwd.hpp" -#include "string_view.h" +#include +#include namespace datadog { namespace tracing { class Metric { - std::string name_; - std::string type_; + const std::string name_; + const std::string type_; + const std::vector tags_; bool common_; - protected: + + protected: std::atomic value_ = 0; - Metric(std::string name, std::string type, bool common); + Metric(const std::string name, std::string type, + const std::vector tags, bool common); + public: - std::string name(); - std::string type(); + const std::string name(); + const std::string type(); + const std::vector tags(); bool common(); uint64_t value(); }; class CounterMetric : public Metric { public: - CounterMetric(std::string name, bool common); + CounterMetric(const std::string name, const std::vector tags, + bool common); void inc(); void add(uint64_t amount); }; class GaugeMetric : public Metric { public: - GaugeMetric(std::string name, bool common); + GaugeMetric(const std::string name, const std::vector tags, + bool common); void set(uint64_t value); void inc(); void add(uint64_t amount); diff --git a/src/datadog/trace_segment.cpp b/src/datadog/trace_segment.cpp index 9b92a69d..66ff362d 100644 --- a/src/datadog/trace_segment.cpp +++ b/src/datadog/trace_segment.cpp @@ -118,9 +118,6 @@ TraceSegment::TraceSegment( assert(span_sampler_); assert(defaults_); - tracer_telemetry_->traces_started().inc(); - tracer_telemetry_->active_traces().inc(); - register_span(std::move(local_root)); } @@ -141,8 +138,7 @@ Optional TraceSegment::sampling_decision() const { Logger& TraceSegment::logger() const { return *logger_; } void TraceSegment::register_span(std::unique_ptr span) { - tracer_telemetry_->spans_started().inc(); - tracer_telemetry_->active_spans().inc(); + tracer_telemetry_->metrics().tracer.spans_created.inc(); std::lock_guard lock(mutex_); assert(spans_.empty() || num_finished_spans_ < spans_.size()); @@ -150,10 +146,8 @@ void TraceSegment::register_span(std::unique_ptr span) { } void TraceSegment::span_finished() { - tracer_telemetry_->spans_finished().inc(); - tracer_telemetry_->active_spans().dec(); - { + tracer_telemetry_->metrics().tracer.spans_finished.inc(); std::lock_guard lock(mutex_); ++num_finished_spans_; assert(num_finished_spans_ <= spans_.size()); @@ -233,8 +227,7 @@ void TraceSegment::span_finished() { error->with_prefix("Error sending spans to collector: ")); } - tracer_telemetry_->traces_finished().inc(); - tracer_telemetry_->active_traces().dec(); + tracer_telemetry_->metrics().tracer.trace_segments_closed.inc(); } void TraceSegment::override_sampling_priority(int priority) { diff --git a/src/datadog/trace_segment.h b/src/datadog/trace_segment.h index 0e233447..d96bef22 100644 --- a/src/datadog/trace_segment.h +++ b/src/datadog/trace_segment.h @@ -33,6 +33,7 @@ #include #include "expected.h" +#include "metrics.h" #include "optional.h" #include "propagation_style.h" #include "sampling_decision.h" diff --git a/src/datadog/tracer.cpp b/src/datadog/tracer.cpp index e94cb535..b0c1e96b 100644 --- a/src/datadog/tracer.cpp +++ b/src/datadog/tracer.cpp @@ -293,6 +293,7 @@ Span Tracer::create_span(const SpanConfig& config) { span_data->parent_id = 0; const auto span_data_ptr = span_data.get(); + tracer_telemetry_->metrics().tracer.trace_segments_created_new.inc(); const auto segment = std::make_shared( logger_, collector_, tracer_telemetry_, trace_sampler_, span_sampler_, defaults_, injection_styles_, hostname_, nullopt /* origin */, @@ -456,6 +457,7 @@ Expected Tracer::extract_span(const DictReader& reader, } const auto span_data_ptr = span_data.get(); + tracer_telemetry_->metrics().tracer.trace_segments_created_continued.inc(); const auto segment = std::make_shared( logger_, collector_, tracer_telemetry_, trace_sampler_, span_sampler_, defaults_, injection_styles_, hostname_, std::move(origin), diff --git a/src/datadog/tracer_telemetry.cpp b/src/datadog/tracer_telemetry.cpp index fee33a73..90a49936 100644 --- a/src/datadog/tracer_telemetry.cpp +++ b/src/datadog/tracer_telemetry.cpp @@ -13,27 +13,49 @@ namespace tracing { TracerTelemetry::TracerTelemetry(const Clock& clock, const FinalizedTracerConfig& config) : clock_(clock), config_(config) { - /* - metrics_.emplace_back(traces_started_, MetricSnapshot{}); - metrics_.emplace_back(traces_finished_, MetricSnapshot{}); - metrics_.emplace_back(active_traces_, MetricSnapshot{}); - */ - metrics_.emplace_back(trace_api_requests_, MetricSnapshot{}); - } - + metrics_snapshots_.emplace_back(metrics_.tracer.spans_created, + MetricSnapshot{}); + metrics_snapshots_.emplace_back(metrics_.tracer.spans_finished, + MetricSnapshot{}); + metrics_snapshots_.emplace_back(metrics_.tracer.trace_segments_created_new, + MetricSnapshot{}); + metrics_snapshots_.emplace_back( + metrics_.tracer.trace_segments_created_continued, MetricSnapshot{}); + metrics_snapshots_.emplace_back(metrics_.tracer.trace_segments_closed, + MetricSnapshot{}); + metrics_snapshots_.emplace_back(metrics_.trace_api.requests, + MetricSnapshot{}); + metrics_snapshots_.emplace_back(metrics_.trace_api.responses_1xx, + MetricSnapshot{}); + metrics_snapshots_.emplace_back(metrics_.trace_api.responses_2xx, + MetricSnapshot{}); + metrics_snapshots_.emplace_back(metrics_.trace_api.responses_3xx, + MetricSnapshot{}); + metrics_snapshots_.emplace_back(metrics_.trace_api.responses_4xx, + MetricSnapshot{}); + metrics_snapshots_.emplace_back(metrics_.trace_api.responses_5xx, + MetricSnapshot{}); + metrics_snapshots_.emplace_back(metrics_.trace_api.errors_timeout, + MetricSnapshot{}); + metrics_snapshots_.emplace_back(metrics_.trace_api.errors_network, + MetricSnapshot{}); + metrics_snapshots_.emplace_back(metrics_.trace_api.errors_status_code, + MetricSnapshot{}); +} -std::string TracerTelemetry::appStarted() { +std::string TracerTelemetry::app_started() { time_t tracer_time = std::chrono::duration_cast( - clock_().wall.time_since_epoch()) - .count(); + clock_().wall.time_since_epoch()) + .count(); std::string hostname = get_hostname().value_or("hostname-unavailable"); config_.logger->log_error([&](auto& stream) { - stream << "app-started: hostname=" << hostname << " seq_id=" << seq_id; - }); - + stream << "app-started: hostname=" << hostname << " seq_id=" << seq_id; + }); + seq_id++; auto payload = - nlohmann::json::object({ + nlohmann::json::object( + { {"api_version", "v2"}, {"seq_id", seq_id}, {"request_type", "app-started"}, @@ -47,11 +69,11 @@ std::string TracerTelemetry::appStarted() { {"tracer_version", tracer_version_string}, {"language_name", "cpp"}, {"language_version", std::to_string(__cplusplus)}, - })}, + })}, // TODO: host information (hostname, os, os_version, kernel, etc) {"host", nlohmann::json::object({ - {"hostname", hostname}, - })}, + {"hostname", hostname}, + })}, {"payload", nlohmann::json::object({ {"configuration", nlohmann::json::array({ @@ -59,22 +81,22 @@ std::string TracerTelemetry::appStarted() { // finalized config details })}, - })}, + })}, }) .dump(); return payload; } -void TracerTelemetry::captureMetrics() { +void TracerTelemetry::capture_metrics() { time_t timepoint = std::chrono::duration_cast( - clock_().wall.time_since_epoch()) - .count(); - for (auto& m : metrics_) { + clock_().wall.time_since_epoch()) + .count(); + for (auto& m : metrics_snapshots_) { m.second.emplace_back(timepoint, m.first.get().value()); } - for (auto& m : metrics_) { + for (auto& m : metrics_snapshots_) { std::cout << "metrics: " << m.first.get().name() << std::endl; for (auto& v : m.second) { std::cout << v.first << " " << v.second << std::endl; @@ -82,66 +104,68 @@ void TracerTelemetry::captureMetrics() { } } -std::string TracerTelemetry::heartbeatAndTelemetry() { +std::string TracerTelemetry::heartbeat_and_telemetry() { time_t tracer_time = std::chrono::duration_cast( - clock_().wall.time_since_epoch()) - .count(); + clock_().wall.time_since_epoch()) + .count(); std::string hostname = get_hostname().value_or("hostname-unavailable"); auto heartbeat = nlohmann::json::object({ {"request_type", "app-heartbeat"}, - }); + }); auto metrics = nlohmann::json::array(); - for (auto& m : metrics_) { + for (auto& m : metrics_snapshots_) { auto& metric = m.first.get(); auto& points = m.second; metrics.emplace_back(nlohmann::json::object({ - {"metric", metric.name()}, - {"type", metric.type()}, - {"interval", 60}, - {"points", points}, - {"common", metric.common()}, - })); + {"metric", metric.name()}, + {"type", metric.type()}, + {"interval", 60}, + {"points", points}, + {"common", metric.common()}, + })); m.second.clear(); } auto generate_metrics = nlohmann::json::object({ {"request_type", "generate-metrics"}, {"payload", nlohmann::json::object({ - {"namespace", "tracers"}, - {"series", metrics}, - })}, - }); - + {"namespace", "tracers"}, + {"series", metrics}, + })}, + }); + seq_id++; - auto payload = nlohmann::json::object({ - {"api_version", "v2"}, - {"seq_id", seq_id}, - {"request_type", "message-batch"}, - {"tracer_time", tracer_time}, - {"runtime_id", "0524398a-11e2-4375-a637-619eb9148e8f"}, - {"debug", true}, - {"application", - nlohmann::json::object({ - {"service_name", config_.defaults.service}, - {"env", config_.defaults.environment}, - {"tracer_version", tracer_version_string}, - {"language_name", "cpp"}, - {"language_version", std::to_string(__cplusplus)}, - })}, - // TODO: host information (hostname, os, os_version, kernel, etc) - {"host", nlohmann::json::object({ - {"hostname", hostname}, - })}, - {"payload", nlohmann::json::array({ - heartbeat, - generate_metrics, - })}, - }).dump(); - config_.logger->log_error([&](auto& stream) { - stream << "telemetry payload: " << payload; - }); + auto payload = + nlohmann::json::object( + { + {"api_version", "v2"}, + {"seq_id", seq_id}, + {"request_type", "message-batch"}, + {"tracer_time", tracer_time}, + {"runtime_id", "0524398a-11e2-4375-a637-619eb9148e8f"}, + {"debug", true}, + {"application", + nlohmann::json::object({ + {"service_name", config_.defaults.service}, + {"env", config_.defaults.environment}, + {"tracer_version", tracer_version_string}, + {"language_name", "cpp"}, + {"language_version", std::to_string(__cplusplus)}, + })}, + // TODO: host information (hostname, os, os_version, kernel, etc) + {"host", nlohmann::json::object({ + {"hostname", hostname}, + })}, + {"payload", nlohmann::json::array({ + heartbeat, + generate_metrics, + })}, + }) + .dump(); + config_.logger->log_error( + [&](auto& stream) { stream << "telemetry payload: " << payload; }); return payload; } diff --git a/src/datadog/tracer_telemetry.h b/src/datadog/tracer_telemetry.h index cab7d8c1..5453b661 100644 --- a/src/datadog/tracer_telemetry.h +++ b/src/datadog/tracer_telemetry.h @@ -11,36 +11,54 @@ class TracerTelemetry { Clock clock_; FinalizedTracerConfig config_; uint64_t seq_id = 0; - std::vector, std::vector>>> counter_metrics_; - std::vector, std::vector>>> gauge_metrics_; using MetricSnapshot = std::vector>; - std::vector, MetricSnapshot>> metrics_; + std::vector, MetricSnapshot>> + metrics_snapshots_; - CounterMetric traces_started_ = {"traces_started", true}; - CounterMetric traces_finished_ = {"traces_finished", true}; - GaugeMetric active_traces_ = {"active_traces", true}; + struct { + struct { + CounterMetric spans_created = { + "spans_created", {"integration_name:datadog"}, true}; + CounterMetric spans_finished = { + "spans_finished", {"integration_name:datadog"}, true}; - CounterMetric spans_started_ = {"spans_started", true}; - CounterMetric spans_finished_ = {"spans_finished", true}; - GaugeMetric active_spans_ = {"active_spans", true}; + CounterMetric trace_segments_created_new = { + "trace_segments_created", {"new_continued:new"}, true}; + CounterMetric trace_segments_created_continued = { + "trace_segments_created", {"new_continued:continued"}, true}; + CounterMetric trace_segments_closed = { + "trace_segments_closed", {"integration_name:datadog"}, true}; + } tracer; + struct { + CounterMetric requests = {"trace_api.requests", {}, true}; - CounterMetric trace_api_requests_ = {"trace_api.requests", true}; + CounterMetric responses_1xx = { + "trace_api.responses", {"status_code:1xx"}, true}; + CounterMetric responses_2xx = { + "trace_api.responses", {"status_code:2xx"}, true}; + CounterMetric responses_3xx = { + "trace_api.responses", {"status_code:3xx"}, true}; + CounterMetric responses_4xx = { + "trace_api.responses", {"status_code:4xx"}, true}; + CounterMetric responses_5xx = { + "trace_api.responses", {"status_code:5xx"}, true}; - public: - TracerTelemetry(const Clock& clock, const FinalizedTracerConfig& config); - std::string appStarted(); - void captureMetrics(); - std::string heartbeatAndTelemetry(); - - CounterMetric& traces_started() { return traces_started_; }; - CounterMetric& traces_finished() { return traces_finished_; }; - GaugeMetric& active_traces() { return active_traces_; }; + CounterMetric errors_timeout = { + "trace_api.errors", {"type:timeout"}, true}; + CounterMetric errors_network = { + "trace_api.errors", {"type:network"}, true}; + CounterMetric errors_status_code = { + "trace_api.errors", {"type:status_code"}, true}; - CounterMetric& spans_started() { return spans_started_; }; - CounterMetric& spans_finished() { return spans_finished_; }; - GaugeMetric& active_spans() { return active_spans_; }; + } trace_api; + } metrics_; - CounterMetric& trace_api_requests() { return trace_api_requests_; }; + public: + TracerTelemetry(const Clock& clock, const FinalizedTracerConfig& config); + auto& metrics() { return metrics_; }; + std::string app_started(); + void capture_metrics(); + std::string heartbeat_and_telemetry(); }; } // namespace tracing From f41f4d40d4dbf5cbb8648b4fdd5a8bf965c221fb Mon Sep 17 00:00:00 2001 From: Caleb Gilmour Date: Fri, 29 Sep 2023 09:11:53 +0000 Subject: [PATCH 03/39] Add tags to metrics payloads. Ignore unused counters. --- src/datadog/tracer_telemetry.cpp | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/src/datadog/tracer_telemetry.cpp b/src/datadog/tracer_telemetry.cpp index 90a49936..379d4d50 100644 --- a/src/datadog/tracer_telemetry.cpp +++ b/src/datadog/tracer_telemetry.cpp @@ -93,7 +93,11 @@ void TracerTelemetry::capture_metrics() { clock_().wall.time_since_epoch()) .count(); for (auto& m : metrics_snapshots_) { - m.second.emplace_back(timepoint, m.first.get().value()); + auto value = m.first.get().value(); + if (value == 0) { + continue; + } + m.second.emplace_back(timepoint, value); } for (auto& m : metrics_snapshots_) { @@ -118,14 +122,18 @@ std::string TracerTelemetry::heartbeat_and_telemetry() { for (auto& m : metrics_snapshots_) { auto& metric = m.first.get(); auto& points = m.second; + if (points.empty()) { + continue; + } + metrics.emplace_back(nlohmann::json::object({ {"metric", metric.name()}, + {"tags", metric.tags()}, {"type", metric.type()}, {"interval", 60}, {"points", points}, {"common", metric.common()}, })); - m.second.clear(); } auto generate_metrics = nlohmann::json::object({ From b91869f7124f9c6da05a61103f67855f008e6852 Mon Sep 17 00:00:00 2001 From: Caleb Gilmour Date: Mon, 2 Oct 2023 06:32:35 +0000 Subject: [PATCH 04/39] Remove comforting developer noise --- src/datadog/tracer_telemetry.cpp | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/src/datadog/tracer_telemetry.cpp b/src/datadog/tracer_telemetry.cpp index 379d4d50..e24c718f 100644 --- a/src/datadog/tracer_telemetry.cpp +++ b/src/datadog/tracer_telemetry.cpp @@ -1,7 +1,5 @@ #include "tracer_telemetry.h" -#include - #include "json.hpp" #include "logger.h" #include "platform_util.h" @@ -48,9 +46,6 @@ std::string TracerTelemetry::app_started() { clock_().wall.time_since_epoch()) .count(); std::string hostname = get_hostname().value_or("hostname-unavailable"); - config_.logger->log_error([&](auto& stream) { - stream << "app-started: hostname=" << hostname << " seq_id=" << seq_id; - }); seq_id++; auto payload = @@ -99,13 +94,6 @@ void TracerTelemetry::capture_metrics() { } m.second.emplace_back(timepoint, value); } - - for (auto& m : metrics_snapshots_) { - std::cout << "metrics: " << m.first.get().name() << std::endl; - for (auto& v : m.second) { - std::cout << v.first << " " << v.second << std::endl; - } - } } std::string TracerTelemetry::heartbeat_and_telemetry() { @@ -172,8 +160,6 @@ std::string TracerTelemetry::heartbeat_and_telemetry() { })}, }) .dump(); - config_.logger->log_error( - [&](auto& stream) { stream << "telemetry payload: " << payload; }); return payload; } From 0d4e82f95c236f018980ca8171e82a9be3f1e6cb Mon Sep 17 00:00:00 2001 From: Caleb Gilmour Date: Mon, 2 Oct 2023 06:36:17 +0000 Subject: [PATCH 05/39] Additions for building via bazel --- BUILD.bazel | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/BUILD.bazel b/BUILD.bazel index 00af9de2..56f256fb 100644 --- a/BUILD.bazel +++ b/BUILD.bazel @@ -15,6 +15,7 @@ cc_library( "src/datadog/id_generator.cpp", "src/datadog/limiter.cpp", "src/datadog/logger.cpp", + "src/datadog/metrics.cpp", "src/datadog/msgpack.cpp", "src/datadog/null_collector.cpp", "src/datadog/parse_util.cpp", @@ -32,6 +33,7 @@ cc_library( "src/datadog/tags.cpp", "src/datadog/threaded_event_scheduler.cpp", "src/datadog/tracer_config.cpp", + "src/datadog/tracer_telemetry.cpp", "src/datadog/tracer.cpp", "src/datadog/trace_id.cpp", "src/datadog/trace_sampler_config.cpp", @@ -64,6 +66,7 @@ cc_library( "src/datadog/json_fwd.hpp", "src/datadog/limiter.h", "src/datadog/logger.h", + "src/datadog/metrics.h", "src/datadog/msgpack.h", "src/datadog/null_collector.h", "src/datadog/optional.h", @@ -88,6 +91,7 @@ cc_library( "src/datadog/tags.h", "src/datadog/threaded_event_scheduler.h", "src/datadog/tracer_config.h", + "src/datadog/tracer_telemetry.h", "src/datadog/tracer.h", "src/datadog/trace_id.h", "src/datadog/trace_sampler_config.h", @@ -110,4 +114,4 @@ cc_library( "@com_google_absl//absl/strings", "@com_google_absl//absl/types:optional", ], -) \ No newline at end of file +) From 94bfc20d1dbe257a339718df2e1131afd200d0ee Mon Sep 17 00:00:00 2001 From: Caleb Gilmour Date: Tue, 3 Oct 2023 03:48:23 +0000 Subject: [PATCH 06/39] Adding comments --- src/datadog/metrics.h | 10 ++++++++++ src/datadog/tracer_telemetry.h | 4 +++- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/src/datadog/metrics.h b/src/datadog/metrics.h index 23dafe2e..00208bb5 100644 --- a/src/datadog/metrics.h +++ b/src/datadog/metrics.h @@ -8,9 +8,17 @@ namespace datadog { namespace tracing { class Metric { + // The name of the metric that will be published. A transformation occurs + // based on the name and whether it is "common" or "language-specific" when it + // is recorded. const std::string name_; + // The type of the metric. This will currently be count or gauge. const std::string type_; + // Tags associated with this specific instance of the metric. const std::vector tags_; + // This affects the transformation of the metric name, where it can be a + // common telemetry metric, or a language-specific metric that is prefixed + // with the language name. bool common_; protected: @@ -19,6 +27,8 @@ class Metric { const std::vector tags, bool common); public: + // Accessors for name, type, tags, common and value are used when producing + // the JSON message for reporting metrics. const std::string name(); const std::string type(); const std::vector tags(); diff --git a/src/datadog/tracer_telemetry.h b/src/datadog/tracer_telemetry.h index 5453b661..7b87f416 100644 --- a/src/datadog/tracer_telemetry.h +++ b/src/datadog/tracer_telemetry.h @@ -12,9 +12,11 @@ class TracerTelemetry { FinalizedTracerConfig config_; uint64_t seq_id = 0; using MetricSnapshot = std::vector>; + // This uses a reference_wrapper so references to internal metric values can + // be captured, and be iterated trivially when the values need to be + // snapshotted and published in telemetry messages. std::vector, MetricSnapshot>> metrics_snapshots_; - struct { struct { CounterMetric spans_created = { From e13e5d1087f8ff709a56b2c25624117f395e3675 Mon Sep 17 00:00:00 2001 From: David Goffredo Date: Fri, 29 Sep 2023 12:16:11 -0400 Subject: [PATCH 07/39] first pass at a review of cgilmour/telemetry-api: - mention new files in bazel build - don't store FinalizedTracerConfig - consistent spacing_style forMemberFunctions - make config_json() available to send_app_started() - fix unrelated pet peeve in use of log_startup - remove dev noise, which fixed all but one of the broken unit tests --- src/datadog/datadog_agent.cpp | 23 ++++------------------ src/datadog/datadog_agent.h | 5 +++-- src/datadog/tracer.cpp | 14 ++++++++------ src/datadog/tracer.h | 2 +- src/datadog/tracer_telemetry.cpp | 33 ++++++++++++++++++++------------ src/datadog/tracer_telemetry.h | 15 +++++++++++---- test/test_datadog_agent.cpp | 2 ++ 7 files changed, 50 insertions(+), 44 deletions(-) diff --git a/src/datadog/datadog_agent.cpp b/src/datadog/datadog_agent.cpp index eaad31b1..9db1cb40 100644 --- a/src/datadog/datadog_agent.cpp +++ b/src/datadog/datadog_agent.cpp @@ -150,13 +150,12 @@ DatadogAgent::DatadogAgent( n++; tracer_telemetry_->capture_metrics(); if (n % 6 == 0) { - sendHeartbeatAndTelemetry(); + send_heartbeat_and_telemetry(); } })), flush_interval_(config.flush_interval) { assert(logger_); assert(tracer_telemetry_); - sendAppStarted(); } DatadogAgent::~DatadogAgent() { @@ -298,8 +297,8 @@ void DatadogAgent::flush() { } } -void DatadogAgent::sendAppStarted() { - auto payload = tracer_telemetry_->app_started(); +void DatadogAgent::send_app_started(nlohmann::json&& tracer_config) { + auto payload = tracer_telemetry_->app_started(std::move(tracer_config)); auto set_request_headers = [&](DictWriter& headers) { headers.set("Content-Type", "application/json"); }; @@ -314,13 +313,6 @@ void DatadogAgent::sendAppStarted() { << " with body (starts on next line):\n" << response_body; }); - return; - } else { - logger->log_error([&](auto& stream) { - stream << "Successful telemetry submission with response status " - << response_status << " and body (starts on next line):\n" - << response_body; - }); } }; @@ -338,7 +330,7 @@ void DatadogAgent::sendAppStarted() { } } -void DatadogAgent::sendHeartbeatAndTelemetry() { +void DatadogAgent::send_heartbeat_and_telemetry() { auto payload = tracer_telemetry_->heartbeat_and_telemetry(); auto set_request_headers = [&](DictWriter& headers) { headers.set("Content-Type", "application/json"); @@ -354,13 +346,6 @@ void DatadogAgent::sendHeartbeatAndTelemetry() { << " with body (starts on next line):\n" << response_body; }); - return; - } else { - logger->log_error([&](auto& stream) { - stream << "Successful telemetry submission with response status " - << response_status << " and body (starts on next line):\n" - << response_body; - }); } }; diff --git a/src/datadog/datadog_agent.h b/src/datadog/datadog_agent.h index bd33e27c..6a996447 100644 --- a/src/datadog/datadog_agent.h +++ b/src/datadog/datadog_agent.h @@ -47,6 +47,7 @@ class DatadogAgent : public Collector { std::chrono::steady_clock::duration flush_interval_; void flush(); + void send_heartbeat_and_telemetry(); public: DatadogAgent(const FinalizedDatadogAgentConfig&, @@ -57,8 +58,8 @@ class DatadogAgent : public Collector { Expected send( std::vector>&& spans, const std::shared_ptr& response_handler) override; - void sendAppStarted(); - void sendHeartbeatAndTelemetry(); + + void send_app_started(nlohmann::json&& tracer_config); nlohmann::json config_json() const override; }; diff --git a/src/datadog/tracer.cpp b/src/datadog/tracer.cpp index b0c1e96b..704bf691 100644 --- a/src/datadog/tracer.cpp +++ b/src/datadog/tracer.cpp @@ -243,13 +243,14 @@ Tracer::Tracer(const FinalizedTracerConfig& config, const Clock& clock) : logger_(config.logger), collector_(/* see constructor body */), - tracer_telemetry_(std::make_shared(clock, config)), + defaults_(std::make_shared(config.defaults)), + tracer_telemetry_( + std::make_shared(clock, logger_, defaults_)), trace_sampler_( std::make_shared(config.trace_sampler, clock)), span_sampler_(std::make_shared(config.span_sampler, clock)), generator_(generator), clock_(clock), - defaults_(std::make_shared(config.defaults)), injection_styles_(config.injection_styles), extraction_styles_(config.extraction_styles), hostname_(config.report_hostname ? get_hostname() : nullopt), @@ -260,14 +261,15 @@ Tracer::Tracer(const FinalizedTracerConfig& config, } else { auto& agent_config = std::get(config.collector); - collector_ = std::make_shared(agent_config, tracer_telemetry_, + auto agent = std::make_shared(agent_config, tracer_telemetry_, clock, config.logger); + collector_ = agent; + agent->send_app_started(config_json()); } if (config.log_on_startup) { - auto json = config_json(); - logger_->log_startup([&json](std::ostream& log) { - log << "DATADOG TRACER CONFIGURATION - " << json; + logger_->log_startup([this](std::ostream& log) { + log << "DATADOG TRACER CONFIGURATION - " << config_json(); }); } } diff --git a/src/datadog/tracer.h b/src/datadog/tracer.h index a473daf5..de5742fc 100644 --- a/src/datadog/tracer.h +++ b/src/datadog/tracer.h @@ -31,12 +31,12 @@ class SpanSampler; class Tracer { std::shared_ptr logger_; std::shared_ptr collector_; + std::shared_ptr defaults_; std::shared_ptr tracer_telemetry_; std::shared_ptr trace_sampler_; std::shared_ptr span_sampler_; std::shared_ptr generator_; Clock clock_; - std::shared_ptr defaults_; std::vector injection_styles_; std::vector extraction_styles_; Optional hostname_; diff --git a/src/datadog/tracer_telemetry.cpp b/src/datadog/tracer_telemetry.cpp index e24c718f..416721d7 100644 --- a/src/datadog/tracer_telemetry.cpp +++ b/src/datadog/tracer_telemetry.cpp @@ -3,14 +3,16 @@ #include "json.hpp" #include "logger.h" #include "platform_util.h" +#include "span_defaults.h" #include "version.h" namespace datadog { namespace tracing { -TracerTelemetry::TracerTelemetry(const Clock& clock, - const FinalizedTracerConfig& config) - : clock_(clock), config_(config) { +TracerTelemetry::TracerTelemetry( + const Clock& clock, const std::shared_ptr& logger, + const std::shared_ptr& span_defaults) + : clock_(clock), logger_(logger), span_defaults_(span_defaults) { metrics_snapshots_.emplace_back(metrics_.tracer.spans_created, MetricSnapshot{}); metrics_snapshots_.emplace_back(metrics_.tracer.spans_finished, @@ -41,12 +43,11 @@ TracerTelemetry::TracerTelemetry(const Clock& clock, MetricSnapshot{}); } -std::string TracerTelemetry::app_started() { +std::string TracerTelemetry::app_started(nlohmann::json&& tracer_config) { time_t tracer_time = std::chrono::duration_cast( clock_().wall.time_since_epoch()) .count(); - std::string hostname = get_hostname().value_or("hostname-unavailable"); - + seq_id++; auto payload = nlohmann::json::object( @@ -59,15 +60,16 @@ std::string TracerTelemetry::app_started() { {"debug", true}, {"application", nlohmann::json::object({ - {"service_name", config_.defaults.service}, - {"env", config_.defaults.environment}, + {"service_name", span_defaults_->service}, + {"env", span_defaults_->environment}, {"tracer_version", tracer_version_string}, {"language_name", "cpp"}, {"language_version", std::to_string(__cplusplus)}, })}, - // TODO: host information (hostname, os, os_version, kernel, etc) + // TODO: host information (os, os_version, kernel, etc) {"host", nlohmann::json::object({ - {"hostname", hostname}, + {"hostname", + get_hostname().value_or("hostname-unavailable")}, })}, {"payload", nlohmann::json::object({ @@ -77,6 +79,13 @@ std::string TracerTelemetry::app_started() { })}, })}, + // TODO: Until we figure out "configuration", above, include a + // JSON dump of the tracer configuration as "additional_payload". + {"additional_payload", + nlohmann::json::array({nlohmann::json::object({ + {"name", "tracer_config_json"}, + {"value", tracer_config.dump()}, + })})}, }) .dump(); @@ -144,8 +153,8 @@ std::string TracerTelemetry::heartbeat_and_telemetry() { {"debug", true}, {"application", nlohmann::json::object({ - {"service_name", config_.defaults.service}, - {"env", config_.defaults.environment}, + {"service_name", span_defaults_->service}, + {"env", span_defaults_->environment}, {"tracer_version", tracer_version_string}, {"language_name", "cpp"}, {"language_version", std::to_string(__cplusplus)}, diff --git a/src/datadog/tracer_telemetry.h b/src/datadog/tracer_telemetry.h index 7b87f416..1bd453c5 100644 --- a/src/datadog/tracer_telemetry.h +++ b/src/datadog/tracer_telemetry.h @@ -1,15 +1,21 @@ #pragma once +#include #include #include "clock.h" +#include "json_fwd.hpp" #include "metrics.h" -#include "tracer_config.h" namespace datadog { namespace tracing { + +class Logger; +class SpanDefaults; + class TracerTelemetry { Clock clock_; - FinalizedTracerConfig config_; + std::shared_ptr logger_; + std::shared_ptr span_defaults_; uint64_t seq_id = 0; using MetricSnapshot = std::vector>; // This uses a reference_wrapper so references to internal metric values can @@ -56,9 +62,10 @@ class TracerTelemetry { } metrics_; public: - TracerTelemetry(const Clock& clock, const FinalizedTracerConfig& config); + TracerTelemetry(const Clock& clock, const std::shared_ptr& logger, + const std::shared_ptr& span_defaults); auto& metrics() { return metrics_; }; - std::string app_started(); + std::string app_started(nlohmann::json&& tracer_config); void capture_metrics(); std::string heartbeat_and_telemetry(); }; diff --git a/test/test_datadog_agent.cpp b/test/test_datadog_agent.cpp index e194d034..12b371d6 100644 --- a/test/test_datadog_agent.cpp +++ b/test/test_datadog_agent.cpp @@ -140,6 +140,7 @@ TEST_CASE("CollectorResponse") { (void)span; } REQUIRE(event_scheduler->cancelled); + CAPTURE(logger->entries); REQUIRE(logger->error_count() == 1); REQUIRE(logger->first_error().code == error.code); } @@ -156,6 +157,7 @@ TEST_CASE("CollectorResponse") { (void)span; } REQUIRE(event_scheduler->cancelled); + // REVIEW: this fails since the addition of telemetry REQUIRE(logger->error_count() == 1); REQUIRE(logger->first_error().code == error.code); } From 9e432cb0137fa93f3ea59cf4ce3e5d71440f3683 Mon Sep 17 00:00:00 2001 From: Caleb Gilmour Date: Tue, 3 Oct 2023 04:02:11 +0000 Subject: [PATCH 08/39] Store value of hostname in member field --- src/datadog/tracer_telemetry.cpp | 14 +++++++------- src/datadog/tracer_telemetry.h | 1 + 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/src/datadog/tracer_telemetry.cpp b/src/datadog/tracer_telemetry.cpp index 416721d7..95c82726 100644 --- a/src/datadog/tracer_telemetry.cpp +++ b/src/datadog/tracer_telemetry.cpp @@ -12,7 +12,10 @@ namespace tracing { TracerTelemetry::TracerTelemetry( const Clock& clock, const std::shared_ptr& logger, const std::shared_ptr& span_defaults) - : clock_(clock), logger_(logger), span_defaults_(span_defaults) { + : clock_(clock), + logger_(logger), + span_defaults_(span_defaults), + hostname_(get_hostname().value_or("hostname-unavailable")) { metrics_snapshots_.emplace_back(metrics_.tracer.spans_created, MetricSnapshot{}); metrics_snapshots_.emplace_back(metrics_.tracer.spans_finished, @@ -47,7 +50,7 @@ std::string TracerTelemetry::app_started(nlohmann::json&& tracer_config) { time_t tracer_time = std::chrono::duration_cast( clock_().wall.time_since_epoch()) .count(); - + seq_id++; auto payload = nlohmann::json::object( @@ -68,8 +71,7 @@ std::string TracerTelemetry::app_started(nlohmann::json&& tracer_config) { })}, // TODO: host information (os, os_version, kernel, etc) {"host", nlohmann::json::object({ - {"hostname", - get_hostname().value_or("hostname-unavailable")}, + {"hostname", hostname_}, })}, {"payload", nlohmann::json::object({ @@ -109,8 +111,6 @@ std::string TracerTelemetry::heartbeat_and_telemetry() { time_t tracer_time = std::chrono::duration_cast( clock_().wall.time_since_epoch()) .count(); - std::string hostname = get_hostname().value_or("hostname-unavailable"); - auto heartbeat = nlohmann::json::object({ {"request_type", "app-heartbeat"}, }); @@ -161,7 +161,7 @@ std::string TracerTelemetry::heartbeat_and_telemetry() { })}, // TODO: host information (hostname, os, os_version, kernel, etc) {"host", nlohmann::json::object({ - {"hostname", hostname}, + {"hostname", hostname_}, })}, {"payload", nlohmann::json::array({ heartbeat, diff --git a/src/datadog/tracer_telemetry.h b/src/datadog/tracer_telemetry.h index 1bd453c5..07a39afd 100644 --- a/src/datadog/tracer_telemetry.h +++ b/src/datadog/tracer_telemetry.h @@ -16,6 +16,7 @@ class TracerTelemetry { Clock clock_; std::shared_ptr logger_; std::shared_ptr span_defaults_; + std::string hostname_; uint64_t seq_id = 0; using MetricSnapshot = std::vector>; // This uses a reference_wrapper so references to internal metric values can From 0005e6544e4054bc8f69f495b9b5437920c3ccdd Mon Sep 17 00:00:00 2001 From: Caleb Gilmour Date: Tue, 3 Oct 2023 04:11:06 +0000 Subject: [PATCH 09/39] Refactor runtime_id into span defaults, available to tracer telemetry --- src/datadog/span_defaults.cpp | 1 + src/datadog/span_defaults.h | 2 ++ src/datadog/trace_segment.cpp | 10 +++------- src/datadog/tracer_telemetry.cpp | 4 ++-- 4 files changed, 8 insertions(+), 9 deletions(-) diff --git a/src/datadog/span_defaults.cpp b/src/datadog/span_defaults.cpp index 387eaf54..44ba6306 100644 --- a/src/datadog/span_defaults.cpp +++ b/src/datadog/span_defaults.cpp @@ -22,6 +22,7 @@ nlohmann::json to_json(const SpanDefaults& defaults) { TO_JSON(version); TO_JSON(name); TO_JSON(tags); + TO_JSON(runtime_id); #undef TO_JSON return result; } diff --git a/src/datadog/span_defaults.h b/src/datadog/span_defaults.h index 1c9b4fd2..0cf81a00 100644 --- a/src/datadog/span_defaults.h +++ b/src/datadog/span_defaults.h @@ -8,6 +8,7 @@ #include #include "json_fwd.hpp" +#include "random.h" namespace datadog { namespace tracing { @@ -19,6 +20,7 @@ struct SpanDefaults { std::string version = ""; std::string name = ""; std::unordered_map tags; + std::string runtime_id = uuid(); }; nlohmann::json to_json(const SpanDefaults&); diff --git a/src/datadog/trace_segment.cpp b/src/datadog/trace_segment.cpp index 66ff362d..d476a3fb 100644 --- a/src/datadog/trace_segment.cpp +++ b/src/datadog/trace_segment.cpp @@ -16,6 +16,7 @@ #include "platform_util.h" #include "random.h" #include "span_data.h" +#include "span_defaults.h" #include "span_sampler.h" #include "tag_propagation.h" #include "tags.h" @@ -28,12 +29,8 @@ namespace { struct Cache { static int process_id; - static std::string runtime_id; - static void recalculate_values() { - process_id = get_process_id(); - runtime_id = uuid(); - } + static void recalculate_values() { process_id = get_process_id(); } Cache() { recalculate_values(); @@ -42,7 +39,6 @@ struct Cache { }; int Cache::process_id; -std::string Cache::runtime_id; // `cache_singleton` exists solely to invoke `Cache`'s constructor. // All data members are static, so use e.g. `Cache::process_id` instead of @@ -218,7 +214,7 @@ void TraceSegment::span_finished() { } span.numeric_tags[tags::internal::process_id] = Cache::process_id; span.tags[tags::internal::language] = "cpp"; - span.tags[tags::internal::runtime_id] = Cache::runtime_id; + span.tags[tags::internal::runtime_id] = defaults_->runtime_id; } const auto result = collector_->send(std::move(spans_), trace_sampler_); diff --git a/src/datadog/tracer_telemetry.cpp b/src/datadog/tracer_telemetry.cpp index 95c82726..e89f1774 100644 --- a/src/datadog/tracer_telemetry.cpp +++ b/src/datadog/tracer_telemetry.cpp @@ -59,7 +59,7 @@ std::string TracerTelemetry::app_started(nlohmann::json&& tracer_config) { {"seq_id", seq_id}, {"request_type", "app-started"}, {"tracer_time", tracer_time}, - {"runtime_id", "0524398a-11e2-4375-a637-619eb9148e8f"}, + {"runtime_id", span_defaults_->runtime_id}, {"debug", true}, {"application", nlohmann::json::object({ @@ -149,7 +149,7 @@ std::string TracerTelemetry::heartbeat_and_telemetry() { {"seq_id", seq_id}, {"request_type", "message-batch"}, {"tracer_time", tracer_time}, - {"runtime_id", "0524398a-11e2-4375-a637-619eb9148e8f"}, + {"runtime_id", span_defaults_->runtime_id}, {"debug", true}, {"application", nlohmann::json::object({ From e54bd30dd9f5390427c87b3e71ce0936f36188cf Mon Sep 17 00:00:00 2001 From: Caleb Gilmour Date: Tue, 3 Oct 2023 10:25:04 +0000 Subject: [PATCH 10/39] Change count metrics to report absolute instead of cumulative value --- src/datadog/metrics.cpp | 2 +- src/datadog/tracer_telemetry.cpp | 3 --- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/src/datadog/metrics.cpp b/src/datadog/metrics.cpp index 8461eecd..2683d8e9 100644 --- a/src/datadog/metrics.cpp +++ b/src/datadog/metrics.cpp @@ -12,7 +12,7 @@ const std::string Metric::name() { return name_; } const std::string Metric::type() { return type_; } const std::vector Metric::tags() { return tags_; } bool Metric::common() { return common_; } -uint64_t Metric::value() { return value_; } +uint64_t Metric::value() { return value_.exchange(0); } CounterMetric::CounterMetric(const std::string name, const std::vector tags, bool common) diff --git a/src/datadog/tracer_telemetry.cpp b/src/datadog/tracer_telemetry.cpp index e89f1774..89da8bb3 100644 --- a/src/datadog/tracer_telemetry.cpp +++ b/src/datadog/tracer_telemetry.cpp @@ -100,9 +100,6 @@ void TracerTelemetry::capture_metrics() { .count(); for (auto& m : metrics_snapshots_) { auto value = m.first.get().value(); - if (value == 0) { - continue; - } m.second.emplace_back(timepoint, value); } } From 19e3e43676075513795f938eb2f1a48e526e2bc3 Mon Sep 17 00:00:00 2001 From: Caleb Gilmour Date: Fri, 6 Oct 2023 03:27:24 +0000 Subject: [PATCH 11/39] Allow telemetry to be enabled/disabled using DD_INSTRUMENTATION_TELEMETRY_ENABLED --- src/datadog/datadog_agent.cpp | 19 +++++----- src/datadog/environment.h | 1 + src/datadog/tracer.cpp | 8 ++-- src/datadog/tracer_config.cpp | 7 ++++ src/datadog/tracer_config.h | 8 ++++ src/datadog/tracer_telemetry.cpp | 63 +++++++++++++++++--------------- src/datadog/tracer_telemetry.h | 5 ++- 7 files changed, 68 insertions(+), 43 deletions(-) diff --git a/src/datadog/datadog_agent.cpp b/src/datadog/datadog_agent.cpp index 9db1cb40..1fd6bc57 100644 --- a/src/datadog/datadog_agent.cpp +++ b/src/datadog/datadog_agent.cpp @@ -144,18 +144,19 @@ DatadogAgent::DatadogAgent( event_scheduler_(config.event_scheduler), cancel_scheduled_flush_(event_scheduler_->schedule_recurring_event( config.flush_interval, [this]() { flush(); })), - cancel_telemetry_timer_(event_scheduler_->schedule_recurring_event( - std::chrono::seconds(10), - [this, n = 0]() mutable { - n++; - tracer_telemetry_->capture_metrics(); - if (n % 6 == 0) { - send_heartbeat_and_telemetry(); - } - })), flush_interval_(config.flush_interval) { assert(logger_); assert(tracer_telemetry_); + if (tracer_telemetry_->enabled()) { + cancel_telemetry_timer_ = event_scheduler_->schedule_recurring_event( + std::chrono::seconds(10), [this, n = 0]() mutable { + n++; + tracer_telemetry_->capture_metrics(); + if (n % 6 == 0) { + send_heartbeat_and_telemetry(); + } + }); + } } DatadogAgent::~DatadogAgent() { diff --git a/src/datadog/environment.h b/src/datadog/environment.h index 9bba362c..f8800d8c 100644 --- a/src/datadog/environment.h +++ b/src/datadog/environment.h @@ -27,6 +27,7 @@ namespace environment { #define LIST_ENVIRONMENT_VARIABLES(MACRO) \ MACRO(DD_AGENT_HOST) \ MACRO(DD_ENV) \ + MACRO(DD_INSTRUMENTATION_TELEMETRY_ENABLED) \ MACRO(DD_PROPAGATION_STYLE_EXTRACT) \ MACRO(DD_PROPAGATION_STYLE_INJECT) \ MACRO(DD_TRACE_PROPAGATION_STYLE_EXTRACT) \ diff --git a/src/datadog/tracer.cpp b/src/datadog/tracer.cpp index 704bf691..3da0df4d 100644 --- a/src/datadog/tracer.cpp +++ b/src/datadog/tracer.cpp @@ -244,8 +244,8 @@ Tracer::Tracer(const FinalizedTracerConfig& config, : logger_(config.logger), collector_(/* see constructor body */), defaults_(std::make_shared(config.defaults)), - tracer_telemetry_( - std::make_shared(clock, logger_, defaults_)), + tracer_telemetry_(std::make_shared( + config.report_telemetry, clock, logger_, defaults_)), trace_sampler_( std::make_shared(config.trace_sampler, clock)), span_sampler_(std::make_shared(config.span_sampler, clock)), @@ -264,7 +264,9 @@ Tracer::Tracer(const FinalizedTracerConfig& config, auto agent = std::make_shared(agent_config, tracer_telemetry_, clock, config.logger); collector_ = agent; - agent->send_app_started(config_json()); + if (tracer_telemetry_->enabled()) { + agent->send_app_started(config_json()); + } } if (config.log_on_startup) { diff --git a/src/datadog/tracer_config.cpp b/src/datadog/tracer_config.cpp index c4445a03..7dfd6f24 100644 --- a/src/datadog/tracer_config.cpp +++ b/src/datadog/tracer_config.cpp @@ -323,6 +323,13 @@ Expected finalize_config(const TracerConfig &config) { result.collector = config.collector; } + bool report_telemetry = config.report_telemetry; + if (auto enabled_env = + lookup(environment::DD_INSTRUMENTATION_TELEMETRY_ENABLED)) { + report_telemetry = !falsy(*enabled_env); + } + result.report_telemetry = report_telemetry; + if (auto trace_sampler_config = finalize_config(config.trace_sampler)) { result.trace_sampler = std::move(*trace_sampler_config); } else { diff --git a/src/datadog/tracer_config.h b/src/datadog/tracer_config.h index 7382d87f..cf103e5c 100644 --- a/src/datadog/tracer_config.h +++ b/src/datadog/tracer_config.h @@ -50,6 +50,13 @@ struct TracerConfig { // variable. bool report_traces = true; + // `report_telemetry` indicates whether telemetry about the tracer will be + // sent to a collector (`true`) or discarded on completion (`false`). If + // `report_telemetry` is `false`, then this feature is disabled. + // `report_telemetry` is overridden by the + // `DD_INSTRUMENTATION_TELEMETRY_ENABLED` environment variable. + bool report_telemetry = true; + // `trace_sampler` configures trace sampling. Trace sampling determines which // traces are sent to Datadog. See `trace_sampler_config.h`. TraceSamplerConfig trace_sampler; @@ -130,6 +137,7 @@ class FinalizedTracerConfig { std::shared_ptr logger; bool log_on_startup; bool trace_id_128_bit; + bool report_telemetry; std::string runtime_id = uuid(); }; diff --git a/src/datadog/tracer_telemetry.cpp b/src/datadog/tracer_telemetry.cpp index 89da8bb3..4e4ed5d8 100644 --- a/src/datadog/tracer_telemetry.cpp +++ b/src/datadog/tracer_telemetry.cpp @@ -10,40 +10,43 @@ namespace datadog { namespace tracing { TracerTelemetry::TracerTelemetry( - const Clock& clock, const std::shared_ptr& logger, + bool enabled, const Clock& clock, const std::shared_ptr& logger, const std::shared_ptr& span_defaults) - : clock_(clock), + : enabled_(enabled), + clock_(clock), logger_(logger), span_defaults_(span_defaults), hostname_(get_hostname().value_or("hostname-unavailable")) { - metrics_snapshots_.emplace_back(metrics_.tracer.spans_created, - MetricSnapshot{}); - metrics_snapshots_.emplace_back(metrics_.tracer.spans_finished, - MetricSnapshot{}); - metrics_snapshots_.emplace_back(metrics_.tracer.trace_segments_created_new, - MetricSnapshot{}); - metrics_snapshots_.emplace_back( - metrics_.tracer.trace_segments_created_continued, MetricSnapshot{}); - metrics_snapshots_.emplace_back(metrics_.tracer.trace_segments_closed, - MetricSnapshot{}); - metrics_snapshots_.emplace_back(metrics_.trace_api.requests, - MetricSnapshot{}); - metrics_snapshots_.emplace_back(metrics_.trace_api.responses_1xx, - MetricSnapshot{}); - metrics_snapshots_.emplace_back(metrics_.trace_api.responses_2xx, - MetricSnapshot{}); - metrics_snapshots_.emplace_back(metrics_.trace_api.responses_3xx, - MetricSnapshot{}); - metrics_snapshots_.emplace_back(metrics_.trace_api.responses_4xx, - MetricSnapshot{}); - metrics_snapshots_.emplace_back(metrics_.trace_api.responses_5xx, - MetricSnapshot{}); - metrics_snapshots_.emplace_back(metrics_.trace_api.errors_timeout, - MetricSnapshot{}); - metrics_snapshots_.emplace_back(metrics_.trace_api.errors_network, - MetricSnapshot{}); - metrics_snapshots_.emplace_back(metrics_.trace_api.errors_status_code, - MetricSnapshot{}); + if (enabled_) { + metrics_snapshots_.emplace_back(metrics_.tracer.spans_created, + MetricSnapshot{}); + metrics_snapshots_.emplace_back(metrics_.tracer.spans_finished, + MetricSnapshot{}); + metrics_snapshots_.emplace_back(metrics_.tracer.trace_segments_created_new, + MetricSnapshot{}); + metrics_snapshots_.emplace_back( + metrics_.tracer.trace_segments_created_continued, MetricSnapshot{}); + metrics_snapshots_.emplace_back(metrics_.tracer.trace_segments_closed, + MetricSnapshot{}); + metrics_snapshots_.emplace_back(metrics_.trace_api.requests, + MetricSnapshot{}); + metrics_snapshots_.emplace_back(metrics_.trace_api.responses_1xx, + MetricSnapshot{}); + metrics_snapshots_.emplace_back(metrics_.trace_api.responses_2xx, + MetricSnapshot{}); + metrics_snapshots_.emplace_back(metrics_.trace_api.responses_3xx, + MetricSnapshot{}); + metrics_snapshots_.emplace_back(metrics_.trace_api.responses_4xx, + MetricSnapshot{}); + metrics_snapshots_.emplace_back(metrics_.trace_api.responses_5xx, + MetricSnapshot{}); + metrics_snapshots_.emplace_back(metrics_.trace_api.errors_timeout, + MetricSnapshot{}); + metrics_snapshots_.emplace_back(metrics_.trace_api.errors_network, + MetricSnapshot{}); + metrics_snapshots_.emplace_back(metrics_.trace_api.errors_status_code, + MetricSnapshot{}); + } } std::string TracerTelemetry::app_started(nlohmann::json&& tracer_config) { diff --git a/src/datadog/tracer_telemetry.h b/src/datadog/tracer_telemetry.h index 07a39afd..42be500e 100644 --- a/src/datadog/tracer_telemetry.h +++ b/src/datadog/tracer_telemetry.h @@ -13,6 +13,7 @@ class Logger; class SpanDefaults; class TracerTelemetry { + bool enabled_ = false; Clock clock_; std::shared_ptr logger_; std::shared_ptr span_defaults_; @@ -63,8 +64,10 @@ class TracerTelemetry { } metrics_; public: - TracerTelemetry(const Clock& clock, const std::shared_ptr& logger, + TracerTelemetry(bool enabled, const Clock& clock, + const std::shared_ptr& logger, const std::shared_ptr& span_defaults); + bool enabled() { return enabled_; }; auto& metrics() { return metrics_; }; std::string app_started(nlohmann::json&& tracer_config); void capture_metrics(); From 5603789e5d7beaf9d7e9bd6a74dc9134fce9e098 Mon Sep 17 00:00:00 2001 From: Caleb Gilmour Date: Fri, 6 Oct 2023 03:29:34 +0000 Subject: [PATCH 12/39] Add debug option, for developer convenience --- src/datadog/tracer_telemetry.cpp | 4 ++-- src/datadog/tracer_telemetry.h | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/datadog/tracer_telemetry.cpp b/src/datadog/tracer_telemetry.cpp index 4e4ed5d8..3ecb0cfc 100644 --- a/src/datadog/tracer_telemetry.cpp +++ b/src/datadog/tracer_telemetry.cpp @@ -63,7 +63,7 @@ std::string TracerTelemetry::app_started(nlohmann::json&& tracer_config) { {"request_type", "app-started"}, {"tracer_time", tracer_time}, {"runtime_id", span_defaults_->runtime_id}, - {"debug", true}, + {"debug", debug_}, {"application", nlohmann::json::object({ {"service_name", span_defaults_->service}, @@ -150,7 +150,7 @@ std::string TracerTelemetry::heartbeat_and_telemetry() { {"request_type", "message-batch"}, {"tracer_time", tracer_time}, {"runtime_id", span_defaults_->runtime_id}, - {"debug", true}, + {"debug", debug_}, {"application", nlohmann::json::object({ {"service_name", span_defaults_->service}, diff --git a/src/datadog/tracer_telemetry.h b/src/datadog/tracer_telemetry.h index 42be500e..711c7c19 100644 --- a/src/datadog/tracer_telemetry.h +++ b/src/datadog/tracer_telemetry.h @@ -14,6 +14,7 @@ class SpanDefaults; class TracerTelemetry { bool enabled_ = false; + bool debug_ = false; Clock clock_; std::shared_ptr logger_; std::shared_ptr span_defaults_; From 1fd88bdc298ecf39d5ed751f8a512bd26145ed09 Mon Sep 17 00:00:00 2001 From: Caleb Gilmour Date: Fri, 6 Oct 2023 05:39:30 +0000 Subject: [PATCH 13/39] Call capture_and_reset_value() instead of having reset behavior inside of value() --- src/datadog/metrics.cpp | 3 ++- src/datadog/metrics.h | 1 + src/datadog/tracer_telemetry.cpp | 2 +- 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/datadog/metrics.cpp b/src/datadog/metrics.cpp index 2683d8e9..44468c17 100644 --- a/src/datadog/metrics.cpp +++ b/src/datadog/metrics.cpp @@ -12,7 +12,8 @@ const std::string Metric::name() { return name_; } const std::string Metric::type() { return type_; } const std::vector Metric::tags() { return tags_; } bool Metric::common() { return common_; } -uint64_t Metric::value() { return value_.exchange(0); } +uint64_t Metric::value() { return value_; } +uint64_t Metric::capture_and_reset_value() { return value_.exchange(0); } CounterMetric::CounterMetric(const std::string name, const std::vector tags, bool common) diff --git a/src/datadog/metrics.h b/src/datadog/metrics.h index 00208bb5..b6dd3c0c 100644 --- a/src/datadog/metrics.h +++ b/src/datadog/metrics.h @@ -34,6 +34,7 @@ class Metric { const std::vector tags(); bool common(); uint64_t value(); + uint64_t capture_and_reset_value(); }; class CounterMetric : public Metric { diff --git a/src/datadog/tracer_telemetry.cpp b/src/datadog/tracer_telemetry.cpp index 3ecb0cfc..7c6529c6 100644 --- a/src/datadog/tracer_telemetry.cpp +++ b/src/datadog/tracer_telemetry.cpp @@ -102,7 +102,7 @@ void TracerTelemetry::capture_metrics() { clock_().wall.time_since_epoch()) .count(); for (auto& m : metrics_snapshots_) { - auto value = m.first.get().value(); + auto value = m.first.get().capture_and_reset_value(); m.second.emplace_back(timepoint, value); } } From 8cf6d49cbce96446aeced856aaa8d0dbabfcf413 Mon Sep 17 00:00:00 2001 From: Caleb Gilmour Date: Fri, 6 Oct 2023 11:08:54 +0000 Subject: [PATCH 14/39] Omit zero values, and sending empty generate-metrics payloads --- src/datadog/tracer_telemetry.cpp | 49 ++++++++++++++++++-------------- 1 file changed, 27 insertions(+), 22 deletions(-) diff --git a/src/datadog/tracer_telemetry.cpp b/src/datadog/tracer_telemetry.cpp index 7c6529c6..97b08b04 100644 --- a/src/datadog/tracer_telemetry.cpp +++ b/src/datadog/tracer_telemetry.cpp @@ -103,6 +103,9 @@ void TracerTelemetry::capture_metrics() { .count(); for (auto& m : metrics_snapshots_) { auto value = m.first.get().capture_and_reset_value(); + if (value == 0) { + continue; + } m.second.emplace_back(timepoint, value); } } @@ -111,35 +114,40 @@ std::string TracerTelemetry::heartbeat_and_telemetry() { time_t tracer_time = std::chrono::duration_cast( clock_().wall.time_since_epoch()) .count(); + auto batch_payloads = nlohmann::json::array(); + auto heartbeat = nlohmann::json::object({ {"request_type", "app-heartbeat"}, }); + batch_payloads.emplace_back(std::move(heartbeat)); auto metrics = nlohmann::json::array(); for (auto& m : metrics_snapshots_) { auto& metric = m.first.get(); auto& points = m.second; - if (points.empty()) { - continue; + if (!points.empty()) { + metrics.emplace_back(nlohmann::json::object({ + {"metric", metric.name()}, + {"tags", metric.tags()}, + {"type", metric.type()}, + {"interval", 60}, + {"points", points}, + {"common", metric.common()}, + })); } - - metrics.emplace_back(nlohmann::json::object({ - {"metric", metric.name()}, - {"tags", metric.tags()}, - {"type", metric.type()}, - {"interval", 60}, - {"points", points}, - {"common", metric.common()}, - })); + points.clear(); } - auto generate_metrics = nlohmann::json::object({ - {"request_type", "generate-metrics"}, - {"payload", nlohmann::json::object({ - {"namespace", "tracers"}, - {"series", metrics}, - })}, - }); + if (!metrics.empty()) { + auto generate_metrics = nlohmann::json::object({ + {"request_type", "generate-metrics"}, + {"payload", nlohmann::json::object({ + {"namespace", "tracers"}, + {"series", metrics}, + })}, + }); + batch_payloads.emplace_back(std::move(generate_metrics)); + } seq_id++; auto payload = @@ -163,10 +171,7 @@ std::string TracerTelemetry::heartbeat_and_telemetry() { {"host", nlohmann::json::object({ {"hostname", hostname_}, })}, - {"payload", nlohmann::json::array({ - heartbeat, - generate_metrics, - })}, + {"payload", batch_payloads}, }) .dump(); return payload; From 70ad77c4a8207245da4ddda0d064f3593be00233 Mon Sep 17 00:00:00 2001 From: Caleb Gilmour Date: Sun, 8 Oct 2023 01:43:35 +0000 Subject: [PATCH 15/39] Comments --- src/datadog/metrics.h | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/src/datadog/metrics.h b/src/datadog/metrics.h index b6dd3c0c..ad994fde 100644 --- a/src/datadog/metrics.h +++ b/src/datadog/metrics.h @@ -1,5 +1,11 @@ #pragma once +// This component provides an interface, `Metric`, and specific classes for +// Counter and Gauge metrics. A metric has a name, type, and set of key:value +// tags associated with it. Metrics can be general to APM or language-specific. +// General metrics have `common` set to `true`, and language-specific metrics +// have `common` set to `false`. + #include #include #include @@ -27,8 +33,8 @@ class Metric { const std::vector tags, bool common); public: - // Accessors for name, type, tags, common and value are used when producing - // the JSON message for reporting metrics. + // Accessors for name, type, tags, common and capture_and_reset_value are used + // when producing the JSON message for reporting metrics. const std::string name(); const std::string type(); const std::vector tags(); @@ -37,6 +43,8 @@ class Metric { uint64_t capture_and_reset_value(); }; +// A count metric is used for measuring activity, and has methods for adding a +// number of actions, or incrementing the current number of actions by 1. class CounterMetric : public Metric { public: CounterMetric(const std::string name, const std::vector tags, @@ -45,6 +53,9 @@ class CounterMetric : public Metric { void add(uint64_t amount); }; +// A gauge metric is used for measuring state, and mas methods to set the +// current state, add or subtract from it, or increment/decrement the current +// state by 1. class GaugeMetric : public Metric { public: GaugeMetric(const std::string name, const std::vector tags, From 32b78852543e8c2d07d887476bf1c74d9352dc35 Mon Sep 17 00:00:00 2001 From: Caleb Gilmour Date: Tue, 10 Oct 2023 01:42:49 +0000 Subject: [PATCH 16/39] Add app-closing event --- src/datadog/datadog_agent.cpp | 39 +++++++++++++++++- src/datadog/datadog_agent.h | 1 + src/datadog/tracer_telemetry.cpp | 68 +++++++++++++++++++++++++++++++- src/datadog/tracer_telemetry.h | 1 + 4 files changed, 107 insertions(+), 2 deletions(-) diff --git a/src/datadog/datadog_agent.cpp b/src/datadog/datadog_agent.cpp index 1fd6bc57..5cf68b9f 100644 --- a/src/datadog/datadog_agent.cpp +++ b/src/datadog/datadog_agent.cpp @@ -163,7 +163,11 @@ DatadogAgent::~DatadogAgent() { const auto deadline = clock_().tick + std::chrono::seconds(2); cancel_scheduled_flush_(); flush(); - cancel_telemetry_timer_(); + if (tracer_telemetry_->enabled()) { + cancel_telemetry_timer_(); + tracer_telemetry_->capture_metrics(); + send_app_closing(); + } http_client_->drain(deadline); } @@ -364,5 +368,38 @@ void DatadogAgent::send_heartbeat_and_telemetry() { } } +void DatadogAgent::send_app_closing() { + auto payload = tracer_telemetry_->app_closing(); + auto set_request_headers = [&](DictWriter& headers) { + headers.set("Content-Type", "application/json"); + }; + + // Callback for a successful HTTP request, to examine HTTP status. + auto on_response = [logger = logger_](int response_status, + const DictReader& /*response_headers*/, + std::string response_body) { + if (response_status < 200 || response_status >= 300) { + logger->log_error([&](auto& stream) { + stream << "Unexpected telemetry response status " << response_status + << " with body (starts on next line):\n" + << response_body; + }); + } + }; + + // Callback for unsuccessful HTTP request. + auto on_error = [logger = logger_](Error error) { + logger->log_error(error.with_prefix( + "Error occurred during HTTP request for telemetry: ")); + }; + + auto post_result = http_client_->post( + telemetry_endpoint_, std::move(set_request_headers), std::move(payload), + std::move(on_response), std::move(on_error)); + if (auto* error = post_result.if_error()) { + logger_->log_error(*error); + } +} + } // namespace tracing } // namespace datadog diff --git a/src/datadog/datadog_agent.h b/src/datadog/datadog_agent.h index 6a996447..364d6949 100644 --- a/src/datadog/datadog_agent.h +++ b/src/datadog/datadog_agent.h @@ -48,6 +48,7 @@ class DatadogAgent : public Collector { void flush(); void send_heartbeat_and_telemetry(); + void send_app_closing(); public: DatadogAgent(const FinalizedDatadogAgentConfig&, diff --git a/src/datadog/tracer_telemetry.cpp b/src/datadog/tracer_telemetry.cpp index 97b08b04..46478cb3 100644 --- a/src/datadog/tracer_telemetry.cpp +++ b/src/datadog/tracer_telemetry.cpp @@ -93,7 +93,6 @@ std::string TracerTelemetry::app_started(nlohmann::json&& tracer_config) { })})}, }) .dump(); - return payload; } @@ -177,5 +176,72 @@ std::string TracerTelemetry::heartbeat_and_telemetry() { return payload; } +std::string TracerTelemetry::app_closing() { + time_t tracer_time = std::chrono::duration_cast( + clock_().wall.time_since_epoch()) + .count(); + auto batch_payloads = nlohmann::json::array(); + + auto app_closing = nlohmann::json::object({ + {"request_type", "app-closing"}, + }); + batch_payloads.emplace_back(std::move(app_closing)); + + auto metrics = nlohmann::json::array(); + for (auto& m : metrics_snapshots_) { + auto& metric = m.first.get(); + auto& points = m.second; + if (!points.empty()) { + metrics.emplace_back(nlohmann::json::object({ + {"metric", metric.name()}, + {"tags", metric.tags()}, + {"type", metric.type()}, + {"interval", 60}, + {"points", points}, + {"common", metric.common()}, + })); + } + points.clear(); + } + + if (!metrics.empty()) { + auto generate_metrics = nlohmann::json::object({ + {"request_type", "generate-metrics"}, + {"payload", nlohmann::json::object({ + {"namespace", "tracers"}, + {"series", metrics}, + })}, + }); + batch_payloads.emplace_back(std::move(generate_metrics)); + } + + seq_id++; + auto payload = + nlohmann::json::object( + { + {"api_version", "v2"}, + {"seq_id", seq_id}, + {"request_type", "message-batch"}, + {"tracer_time", tracer_time}, + {"runtime_id", span_defaults_->runtime_id}, + {"debug", debug_}, + {"application", + nlohmann::json::object({ + {"service_name", span_defaults_->service}, + {"env", span_defaults_->environment}, + {"tracer_version", tracer_version_string}, + {"language_name", "cpp"}, + {"language_version", std::to_string(__cplusplus)}, + })}, + // TODO: host information (hostname, os, os_version, kernel, etc) + {"host", nlohmann::json::object({ + {"hostname", hostname_}, + })}, + {"payload", batch_payloads}, + }) + .dump(); + return payload; +} + } // namespace tracing } // namespace datadog diff --git a/src/datadog/tracer_telemetry.h b/src/datadog/tracer_telemetry.h index 711c7c19..f5748428 100644 --- a/src/datadog/tracer_telemetry.h +++ b/src/datadog/tracer_telemetry.h @@ -73,6 +73,7 @@ class TracerTelemetry { std::string app_started(nlohmann::json&& tracer_config); void capture_metrics(); std::string heartbeat_and_telemetry(); + std::string app_closing(); }; } // namespace tracing From 16d532b37b86cca603d42913af4c5f372717e2d6 Mon Sep 17 00:00:00 2001 From: Caleb Gilmour Date: Tue, 10 Oct 2023 03:04:30 +0000 Subject: [PATCH 17/39] Temporary fix for datadog agent tests --- test/mocks/http_clients.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/test/mocks/http_clients.h b/test/mocks/http_clients.h index 4170ac03..c30c0cb6 100644 --- a/test/mocks/http_clients.h +++ b/test/mocks/http_clients.h @@ -39,9 +39,12 @@ struct MockHTTPClient : public HTTPClient { ResponseHandler on_response_; ErrorHandler on_error_; - Expected post(const URL&, HeadersSetter set_headers, + Expected post(const URL& url, HeadersSetter set_headers, std::string /*body*/, ResponseHandler on_response, ErrorHandler on_error) override { + if (url.path != "/v0.4/traces") { + return {}; + } std::lock_guard lock{mutex_}; if (!post_error) { on_response_ = on_response; From 9811f8c447ca57e20333d220962b0ea1c137043e Mon Sep 17 00:00:00 2001 From: Caleb Gilmour Date: Tue, 10 Oct 2023 04:48:10 +0000 Subject: [PATCH 18/39] Unit tests --- test/CMakeLists.txt | 2 + test/test_metrics.cpp | 35 +++++++++++++++ test/test_tracer_telemetry.cpp | 79 ++++++++++++++++++++++++++++++++++ 3 files changed, 116 insertions(+) create mode 100644 test/test_metrics.cpp create mode 100644 test/test_tracer_telemetry.cpp diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 76cc09dd..69771cd2 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -22,6 +22,7 @@ add_executable(tests test_datadog_agent.cpp test_glob.cpp test_limiter.cpp + test_metrics.cpp test_msgpack.cpp test_parse_util.cpp test_smoke.cpp @@ -30,6 +31,7 @@ add_executable(tests test_trace_id.cpp test_trace_segment.cpp test_tracer_config.cpp + test_tracer_telemetry.cpp test_tracer.cpp test_trace_sampler.cpp ) diff --git a/test/test_metrics.cpp b/test/test_metrics.cpp new file mode 100644 index 00000000..d8354e73 --- /dev/null +++ b/test/test_metrics.cpp @@ -0,0 +1,35 @@ +// This test covers operations defined for metrics defined in `metrics.h`. + +#include + +#include "test.h" + +using namespace datadog::tracing; + +TEST_CASE("Counter metrics") { + CounterMetric metric = {"test.counter.metric", {"testing-testing:123"}, true}; + + metric.inc(); + metric.add(41); + REQUIRE(metric.value() == 42); + auto captured_value = metric.capture_and_reset_value(); + REQUIRE(captured_value == 42); + REQUIRE(metric.value() == 0); +} + +TEST_CASE("Gauge metrics") { + GaugeMetric metric = {"test.gauge.metric", {"testing-testing:123"}, true}; + + metric.inc(); + metric.add(50); + metric.sub(8); + metric.dec(); + REQUIRE(metric.value() == 42); + auto captured_value = metric.capture_and_reset_value(); + REQUIRE(captured_value == 42); + REQUIRE(metric.value() == 0); + + metric.add(10); + metric.sub(11); + REQUIRE(metric.value() == 0); +} diff --git a/test/test_tracer_telemetry.cpp b/test/test_tracer_telemetry.cpp new file mode 100644 index 00000000..cb07d72d --- /dev/null +++ b/test/test_tracer_telemetry.cpp @@ -0,0 +1,79 @@ +// These are tests for `TracerTelemetry`. TracerTelemetry is used to measure +// activity in other parts of the tracer implementation, and construct messages +// that are sent to the datadog agent. + +#include +#include + +#include + +#include "mocks/loggers.h" +#include "test.h" + +using namespace datadog::tracing; + +TEST_CASE("Tracer telemetry") { + const std::time_t mock_time = 1672484400; + const Clock clock = [mock_time]() { + TimePoint result; + result.wall = std::chrono::system_clock::from_time_t(mock_time); + return result; + }; + auto logger = std::make_shared(); + auto span_defaults = std::make_shared(); + span_defaults->service = "testsvc"; + span_defaults->environment = "test"; + TracerTelemetry tracer_telemetry = {true, clock, logger, span_defaults}; + + SECTION("generates app-started message") { + auto app_started_message = + tracer_telemetry.app_started(nlohmann::json::object()); + auto app_started = nlohmann::json::parse(app_started_message); + REQUIRE(app_started["request_type"] == "app-started"); + } + + SECTION("generates a heartbeat message") { + auto heartbeat_message = tracer_telemetry.heartbeat_and_telemetry(); + auto message_batch = nlohmann::json::parse(heartbeat_message); + REQUIRE(message_batch["payload"].size() == 1); + auto heartbeat = message_batch["payload"][0]; + REQUIRE(heartbeat["request_type"] == "app-heartbeat"); + } + + SECTION("captures metrics and sends generate-metrics payload") { + tracer_telemetry.metrics().tracer.trace_segments_created_new.inc(); + REQUIRE( + tracer_telemetry.metrics().tracer.trace_segments_created_new.value() == + 1); + tracer_telemetry.capture_metrics(); + REQUIRE( + tracer_telemetry.metrics().tracer.trace_segments_created_new.value() == + 0); + auto heartbeat_and_telemetry_message = + tracer_telemetry.heartbeat_and_telemetry(); + auto message_batch = nlohmann::json::parse(heartbeat_and_telemetry_message); + REQUIRE(message_batch["payload"].size() == 2); + auto generate_metrics = message_batch["payload"][1]; + REQUIRE(generate_metrics["request_type"] == "generate-metrics"); + auto payload = generate_metrics["payload"]; + auto series = payload["series"]; + REQUIRE(series.size() == 1); + auto metric = series[0]; + REQUIRE(metric["metric"] == "trace_segments_created"); + auto tags = metric["tags"]; + REQUIRE(tags.size() == 1); + REQUIRE(tags[0] == "new_continued:new"); + auto points = metric["points"]; + REQUIRE(points.size() == 1); + REQUIRE(points[0][0] == mock_time); + REQUIRE(points[0][1] == 1); + } + + SECTION("generates an app-closing event") { + auto app_closing_message = tracer_telemetry.app_closing(); + auto message_batch = nlohmann::json::parse(app_closing_message); + REQUIRE(message_batch["payload"].size() == 1); + auto heartbeat = message_batch["payload"][0]; + REQUIRE(heartbeat["request_type"] == "app-closing"); + } +} From dc94da4b59031856f21dc18e46594a325ae2865d Mon Sep 17 00:00:00 2001 From: Caleb Gilmour Date: Tue, 10 Oct 2023 05:58:36 +0000 Subject: [PATCH 19/39] CI reported this --- src/datadog/tracer_telemetry.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/datadog/tracer_telemetry.h b/src/datadog/tracer_telemetry.h index f5748428..ed4a3310 100644 --- a/src/datadog/tracer_telemetry.h +++ b/src/datadog/tracer_telemetry.h @@ -10,7 +10,7 @@ namespace datadog { namespace tracing { class Logger; -class SpanDefaults; +struct SpanDefaults; class TracerTelemetry { bool enabled_ = false; From 1603c0e0a2f84910df8ef43f72858305bae4b344 Mon Sep 17 00:00:00 2001 From: Caleb Gilmour Date: Tue, 10 Oct 2023 07:31:48 +0000 Subject: [PATCH 20/39] Additional comments --- src/datadog/tracer_telemetry.h | 42 +++++++++++++++++++++++++++++++++- 1 file changed, 41 insertions(+), 1 deletion(-) diff --git a/src/datadog/tracer_telemetry.h b/src/datadog/tracer_telemetry.h index ed4a3310..d1ce5158 100644 --- a/src/datadog/tracer_telemetry.h +++ b/src/datadog/tracer_telemetry.h @@ -1,5 +1,30 @@ #pragma once -#include + +// This component provides a class, TracerTelemetry, that is used to collect +// data from the activity of the tracer implementation, and encode messages that +// can be submitted to the Datadog Agent. +// +// Counter metrics are updated in other parts of the tracers, with the values +// being managed by this class. +// +// The messages that TracerTelemetry produces are +// - `app-started` +// - `message-batch` +// - `app-heartbeat` +// - `generate-metrics` +// - `app-closing` +// +// `app-started` messages are sent as part of initializing the tracer. +// +// At 60 second intervals, a `message-batch` message is sent containing an +// `app-heartbeat` message, and if metrics have changed during that interval, a +// `generate-metrics` message is also included in the batch. +// +// `app-closing` messages are sent as part of terminating the tracer. These are +// sent as a `message-batch` message , and if metrics have changed since the +// last `app-heartbeat` event, a `generate-metrics` message is also included in +// the batch. +// #include #include "clock.h" @@ -20,12 +45,16 @@ class TracerTelemetry { std::shared_ptr span_defaults_; std::string hostname_; uint64_t seq_id = 0; + // Each metric has an associated MetricSnapshot that contains the data points, + // represented as a timestamp and the value of that metric. using MetricSnapshot = std::vector>; // This uses a reference_wrapper so references to internal metric values can // be captured, and be iterated trivially when the values need to be // snapshotted and published in telemetry messages. std::vector, MetricSnapshot>> metrics_snapshots_; + // This structure contains all the metrics that are exposed by tracer + // telemetry. struct { struct { CounterMetric spans_created = { @@ -69,10 +98,21 @@ class TracerTelemetry { const std::shared_ptr& logger, const std::shared_ptr& span_defaults); bool enabled() { return enabled_; }; + // Provides access to the telemetry metrics for updating the values. + // This value should not be stored. auto& metrics() { return metrics_; }; + // Constructs an `app-started` message using information provided when + // constructed and the tracer_config value passed in. std::string app_started(nlohmann::json&& tracer_config); + // This is used to take a snapshot of the current state of metrics and collect + // timestamped "points" of values. These values are later submitted in + // `generate-metrics` messages. void capture_metrics(); + // Constructs a messsage-batch containing `app-heartbeat`, and if metrics have + // been modified, a `generate-metrics` message. std::string heartbeat_and_telemetry(); + // Constructs a message-batch containing `app-closing`, and if metrics have + // been modified, a `generate-metrics` message. std::string app_closing(); }; From 09fc24d074013b4668acbe84101b097faaf3bf9f Mon Sep 17 00:00:00 2001 From: Caleb Gilmour Date: Tue, 10 Oct 2023 08:57:04 +0000 Subject: [PATCH 21/39] Additional comments --- src/datadog/datadog_agent.cpp | 6 ++++++ src/datadog/tracer_telemetry.cpp | 3 +++ 2 files changed, 9 insertions(+) diff --git a/src/datadog/datadog_agent.cpp b/src/datadog/datadog_agent.cpp index 5cf68b9f..41e36d5d 100644 --- a/src/datadog/datadog_agent.cpp +++ b/src/datadog/datadog_agent.cpp @@ -148,6 +148,9 @@ DatadogAgent::DatadogAgent( assert(logger_); assert(tracer_telemetry_); if (tracer_telemetry_->enabled()) { + // Only schedule this if telemetry is enabled. + // Every 10 seconds, have the tracer telemetry capture the metrics values. + // Every 60 seconds, also report those values to the datadog agent. cancel_telemetry_timer_ = event_scheduler_->schedule_recurring_event( std::chrono::seconds(10), [this, n = 0]() mutable { n++; @@ -164,8 +167,11 @@ DatadogAgent::~DatadogAgent() { cancel_scheduled_flush_(); flush(); if (tracer_telemetry_->enabled()) { + // This action only needs to occur if tracer telemetry is enabled. cancel_telemetry_timer_(); tracer_telemetry_->capture_metrics(); + // The app-closing message is bundled with a message containing the final + // metric values. send_app_closing(); } http_client_->drain(deadline); diff --git a/src/datadog/tracer_telemetry.cpp b/src/datadog/tracer_telemetry.cpp index 46478cb3..01e5a3d7 100644 --- a/src/datadog/tracer_telemetry.cpp +++ b/src/datadog/tracer_telemetry.cpp @@ -18,6 +18,9 @@ TracerTelemetry::TracerTelemetry( span_defaults_(span_defaults), hostname_(get_hostname().value_or("hostname-unavailable")) { if (enabled_) { + // Register all the metrics that we're tracking by adding them to the + // metrics_snapshots_ container. This allows for simpler iteration logic + // when using the values in `generate-metrics` messages. metrics_snapshots_.emplace_back(metrics_.tracer.spans_created, MetricSnapshot{}); metrics_snapshots_.emplace_back(metrics_.tracer.spans_finished, From 28cc72448ba05c757e55ca1f305bdfe150ddcd79 Mon Sep 17 00:00:00 2001 From: Caleb Gilmour Date: Wed, 11 Oct 2023 00:08:56 +0000 Subject: [PATCH 22/39] Refactor repeated telemetry body construction --- src/datadog/tracer_telemetry.cpp | 145 ++++++++++--------------------- src/datadog/tracer_telemetry.h | 6 +- 2 files changed, 50 insertions(+), 101 deletions(-) diff --git a/src/datadog/tracer_telemetry.cpp b/src/datadog/tracer_telemetry.cpp index 01e5a3d7..dd3e9bd3 100644 --- a/src/datadog/tracer_telemetry.cpp +++ b/src/datadog/tracer_telemetry.cpp @@ -1,6 +1,5 @@ #include "tracer_telemetry.h" -#include "json.hpp" #include "logger.h" #include "platform_util.h" #include "span_defaults.h" @@ -52,51 +51,49 @@ TracerTelemetry::TracerTelemetry( } } -std::string TracerTelemetry::app_started(nlohmann::json&& tracer_config) { +nlohmann::json TracerTelemetry::generate_telemetry_body( + std::string request_type) { time_t tracer_time = std::chrono::duration_cast( clock_().wall.time_since_epoch()) .count(); - - seq_id++; - auto payload = - nlohmann::json::object( - { - {"api_version", "v2"}, - {"seq_id", seq_id}, - {"request_type", "app-started"}, - {"tracer_time", tracer_time}, - {"runtime_id", span_defaults_->runtime_id}, - {"debug", debug_}, - {"application", - nlohmann::json::object({ - {"service_name", span_defaults_->service}, - {"env", span_defaults_->environment}, - {"tracer_version", tracer_version_string}, - {"language_name", "cpp"}, - {"language_version", std::to_string(__cplusplus)}, + seq_id_++; + return nlohmann::json::object({ + {"api_version", "v2"}, + {"seq_id", seq_id_}, + {"request_type", request_type}, + {"tracer_time", tracer_time}, + {"runtime_id", span_defaults_->runtime_id}, + {"debug", debug_}, + {"application", nlohmann::json::object({ + {"service_name", span_defaults_->service}, + {"env", span_defaults_->environment}, + {"tracer_version", tracer_version_string}, + {"language_name", "cpp"}, + {"language_version", std::to_string(__cplusplus)}, + })}, + // TODO: host information (os, os_version, kernel, etc) + {"host", nlohmann::json::object({ + {"hostname", hostname_}, })}, - // TODO: host information (os, os_version, kernel, etc) - {"host", nlohmann::json::object({ - {"hostname", hostname_}, - })}, - {"payload", - nlohmann::json::object({ - {"configuration", nlohmann::json::array({ - // TODO: environment variables or - // finalized config details - })}, + }); +} - })}, - // TODO: Until we figure out "configuration", above, include a - // JSON dump of the tracer configuration as "additional_payload". - {"additional_payload", - nlohmann::json::array({nlohmann::json::object({ - {"name", "tracer_config_json"}, - {"value", tracer_config.dump()}, - })})}, - }) - .dump(); - return payload; +std::string TracerTelemetry::app_started(nlohmann::json&& tracer_config) { + auto telemetry_body = generate_telemetry_body("app-started"); + // TODO: environment variables or finalized config details + telemetry_body["payload"] = nlohmann::json::object({ + {"configuration", nlohmann::json::array({})}, + + }); + // TODO: Until we figure out "configuration", above, include a + // JSON dump of the tracer configuration as "additional_payload". + telemetry_body["additional_payload"] = + nlohmann::json::array({nlohmann::json::object({ + {"name", "tracer_config_json"}, + {"value", tracer_config.dump()}, + })}); + auto app_started_payload = telemetry_body.dump(); + return app_started_payload; } void TracerTelemetry::capture_metrics() { @@ -113,9 +110,6 @@ void TracerTelemetry::capture_metrics() { } std::string TracerTelemetry::heartbeat_and_telemetry() { - time_t tracer_time = std::chrono::duration_cast( - clock_().wall.time_since_epoch()) - .count(); auto batch_payloads = nlohmann::json::array(); auto heartbeat = nlohmann::json::object({ @@ -151,38 +145,13 @@ std::string TracerTelemetry::heartbeat_and_telemetry() { batch_payloads.emplace_back(std::move(generate_metrics)); } - seq_id++; - auto payload = - nlohmann::json::object( - { - {"api_version", "v2"}, - {"seq_id", seq_id}, - {"request_type", "message-batch"}, - {"tracer_time", tracer_time}, - {"runtime_id", span_defaults_->runtime_id}, - {"debug", debug_}, - {"application", - nlohmann::json::object({ - {"service_name", span_defaults_->service}, - {"env", span_defaults_->environment}, - {"tracer_version", tracer_version_string}, - {"language_name", "cpp"}, - {"language_version", std::to_string(__cplusplus)}, - })}, - // TODO: host information (hostname, os, os_version, kernel, etc) - {"host", nlohmann::json::object({ - {"hostname", hostname_}, - })}, - {"payload", batch_payloads}, - }) - .dump(); - return payload; + auto telemetry_body = generate_telemetry_body("message-batch"); + telemetry_body["payload"] = batch_payloads; + auto message_batch_payload = telemetry_body.dump(); + return message_batch_payload; } std::string TracerTelemetry::app_closing() { - time_t tracer_time = std::chrono::duration_cast( - clock_().wall.time_since_epoch()) - .count(); auto batch_payloads = nlohmann::json::array(); auto app_closing = nlohmann::json::object({ @@ -218,32 +187,10 @@ std::string TracerTelemetry::app_closing() { batch_payloads.emplace_back(std::move(generate_metrics)); } - seq_id++; - auto payload = - nlohmann::json::object( - { - {"api_version", "v2"}, - {"seq_id", seq_id}, - {"request_type", "message-batch"}, - {"tracer_time", tracer_time}, - {"runtime_id", span_defaults_->runtime_id}, - {"debug", debug_}, - {"application", - nlohmann::json::object({ - {"service_name", span_defaults_->service}, - {"env", span_defaults_->environment}, - {"tracer_version", tracer_version_string}, - {"language_name", "cpp"}, - {"language_version", std::to_string(__cplusplus)}, - })}, - // TODO: host information (hostname, os, os_version, kernel, etc) - {"host", nlohmann::json::object({ - {"hostname", hostname_}, - })}, - {"payload", batch_payloads}, - }) - .dump(); - return payload; + auto telemetry_body = generate_telemetry_body("message-batch"); + telemetry_body["payload"] = batch_payloads; + auto message_batch_payload = telemetry_body.dump(); + return message_batch_payload; } } // namespace tracing diff --git a/src/datadog/tracer_telemetry.h b/src/datadog/tracer_telemetry.h index d1ce5158..74dc918d 100644 --- a/src/datadog/tracer_telemetry.h +++ b/src/datadog/tracer_telemetry.h @@ -28,7 +28,7 @@ #include #include "clock.h" -#include "json_fwd.hpp" +#include "json.hpp" #include "metrics.h" namespace datadog { @@ -44,7 +44,7 @@ class TracerTelemetry { std::shared_ptr logger_; std::shared_ptr span_defaults_; std::string hostname_; - uint64_t seq_id = 0; + uint64_t seq_id_ = 0; // Each metric has an associated MetricSnapshot that contains the data points, // represented as a timestamp and the value of that metric. using MetricSnapshot = std::vector>; @@ -93,6 +93,8 @@ class TracerTelemetry { } trace_api; } metrics_; + nlohmann::json generate_telemetry_body(std::string request_type); + public: TracerTelemetry(bool enabled, const Clock& clock, const std::shared_ptr& logger, From 31d88a3ec44ea4fa9b2308595c2cfc0217445b19 Mon Sep 17 00:00:00 2001 From: Caleb Gilmour Date: Wed, 11 Oct 2023 01:39:31 +0000 Subject: [PATCH 23/39] Refactor http client callbacks --- src/datadog/datadog_agent.cpp | 103 +++++++++------------------------- src/datadog/datadog_agent.h | 4 ++ 2 files changed, 32 insertions(+), 75 deletions(-) diff --git a/src/datadog/datadog_agent.cpp b/src/datadog/datadog_agent.cpp index 41e36d5d..20169df6 100644 --- a/src/datadog/datadog_agent.cpp +++ b/src/datadog/datadog_agent.cpp @@ -159,6 +159,28 @@ DatadogAgent::DatadogAgent( send_heartbeat_and_telemetry(); } }); + // Callback for setting telemetry request headers. + telemetry_set_request_headers_ = [](DictWriter& headers) { + headers.set("Content-Type", "application/json"); + }; + // Callback for successful telemetry HTTP requests, to examine HTTP status. + telemetry_on_response_ = [logger = logger_]( + int response_status, + const DictReader& /*response_headers*/, + std::string response_body) { + if (response_status < 200 || response_status >= 300) { + logger->log_error([&](auto& stream) { + stream << "Unexpected telemetry response status " << response_status + << " with body (starts on next line):\n" + << response_body; + }); + } + }; + // Callback for unsuccessful telemetry HTTP requests. + telemetry_on_error_ = [logger = logger_](Error error) { + logger->log_error(error.with_prefix( + "Error occurred during HTTP request for telemetry: ")); + }; } } @@ -310,32 +332,9 @@ void DatadogAgent::flush() { void DatadogAgent::send_app_started(nlohmann::json&& tracer_config) { auto payload = tracer_telemetry_->app_started(std::move(tracer_config)); - auto set_request_headers = [&](DictWriter& headers) { - headers.set("Content-Type", "application/json"); - }; - - // Callback for a successful HTTP request, to examine HTTP status. - auto on_response = [logger = logger_](int response_status, - const DictReader& /*response_headers*/, - std::string response_body) { - if (response_status < 200 || response_status >= 300) { - logger->log_error([&](auto& stream) { - stream << "Unexpected telemetry response status " << response_status - << " with body (starts on next line):\n" - << response_body; - }); - } - }; - - // Callback for unsuccessful HTTP request. - auto on_error = [logger = logger_](Error error) { - logger->log_error(error.with_prefix( - "Error occurred during HTTP request for telemetry: ")); - }; - auto post_result = http_client_->post( - telemetry_endpoint_, std::move(set_request_headers), std::move(payload), - std::move(on_response), std::move(on_error)); + telemetry_endpoint_, telemetry_set_request_headers_, std::move(payload), + telemetry_on_response_, telemetry_on_error_); if (auto* error = post_result.if_error()) { logger_->log_error(*error); } @@ -343,32 +342,9 @@ void DatadogAgent::send_app_started(nlohmann::json&& tracer_config) { void DatadogAgent::send_heartbeat_and_telemetry() { auto payload = tracer_telemetry_->heartbeat_and_telemetry(); - auto set_request_headers = [&](DictWriter& headers) { - headers.set("Content-Type", "application/json"); - }; - - // Callback for a successful HTTP request, to examine HTTP status. - auto on_response = [logger = logger_](int response_status, - const DictReader& /*response_headers*/, - std::string response_body) { - if (response_status < 200 || response_status >= 300) { - logger->log_error([&](auto& stream) { - stream << "Unexpected telemetry response status " << response_status - << " with body (starts on next line):\n" - << response_body; - }); - } - }; - - // Callback for unsuccessful HTTP request. - auto on_error = [logger = logger_](Error error) { - logger->log_error(error.with_prefix( - "Error occurred during HTTP request for telemetry: ")); - }; - auto post_result = http_client_->post( - telemetry_endpoint_, std::move(set_request_headers), std::move(payload), - std::move(on_response), std::move(on_error)); + telemetry_endpoint_, telemetry_set_request_headers_, std::move(payload), + telemetry_on_response_, telemetry_on_error_); if (auto* error = post_result.if_error()) { logger_->log_error(*error); } @@ -376,32 +352,9 @@ void DatadogAgent::send_heartbeat_and_telemetry() { void DatadogAgent::send_app_closing() { auto payload = tracer_telemetry_->app_closing(); - auto set_request_headers = [&](DictWriter& headers) { - headers.set("Content-Type", "application/json"); - }; - - // Callback for a successful HTTP request, to examine HTTP status. - auto on_response = [logger = logger_](int response_status, - const DictReader& /*response_headers*/, - std::string response_body) { - if (response_status < 200 || response_status >= 300) { - logger->log_error([&](auto& stream) { - stream << "Unexpected telemetry response status " << response_status - << " with body (starts on next line):\n" - << response_body; - }); - } - }; - - // Callback for unsuccessful HTTP request. - auto on_error = [logger = logger_](Error error) { - logger->log_error(error.with_prefix( - "Error occurred during HTTP request for telemetry: ")); - }; - auto post_result = http_client_->post( - telemetry_endpoint_, std::move(set_request_headers), std::move(payload), - std::move(on_response), std::move(on_error)); + telemetry_endpoint_, telemetry_set_request_headers_, std::move(payload), + telemetry_on_response_, telemetry_on_error_); if (auto* error = post_result.if_error()) { logger_->log_error(*error); } diff --git a/src/datadog/datadog_agent.h b/src/datadog/datadog_agent.h index 364d6949..77ed5566 100644 --- a/src/datadog/datadog_agent.h +++ b/src/datadog/datadog_agent.h @@ -45,6 +45,10 @@ class DatadogAgent : public Collector { EventScheduler::Cancel cancel_scheduled_flush_; EventScheduler::Cancel cancel_telemetry_timer_; std::chrono::steady_clock::duration flush_interval_; + // Callbacks for submitting telemetry data + HTTPClient::HeadersSetter telemetry_set_request_headers_; + HTTPClient::ResponseHandler telemetry_on_response_; + HTTPClient::ErrorHandler telemetry_on_error_; void flush(); void send_heartbeat_and_telemetry(); From 879cfe34151557a903a083e2fef0003eea8ffe3b Mon Sep 17 00:00:00 2001 From: David Goffredo Date: Thu, 12 Oct 2023 14:32:11 -0400 Subject: [PATCH 24/39] separate runtime_id from class SpanDefaults, and introduce class RuntimeID --- BUILD.bazel | 2 ++ CMakeLists.txt | 14 ++++++++------ src/datadog/runtime_id.cpp | 17 +++++++++++++++++ src/datadog/runtime_id.h | 24 ++++++++++++++++++++++++ src/datadog/span_defaults.cpp | 1 - src/datadog/span_defaults.h | 2 -- src/datadog/trace_segment.cpp | 4 +++- src/datadog/trace_segment.h | 2 ++ src/datadog/tracer.cpp | 20 ++++++++++++-------- src/datadog/tracer.h | 1 + src/datadog/tracer_config.cpp | 2 ++ src/datadog/tracer_config.h | 8 +++++--- src/datadog/tracer_telemetry.cpp | 6 ++++-- src/datadog/tracer_telemetry.h | 5 ++++- test/test_tracer_telemetry.cpp | 3 ++- 15 files changed, 86 insertions(+), 25 deletions(-) create mode 100644 src/datadog/runtime_id.cpp create mode 100644 src/datadog/runtime_id.h diff --git a/BUILD.bazel b/BUILD.bazel index 56f256fb..6f039007 100644 --- a/BUILD.bazel +++ b/BUILD.bazel @@ -23,6 +23,7 @@ cc_library( "src/datadog/propagation_style.cpp", "src/datadog/random.cpp", "src/datadog/rate.cpp", + "src/datadog/runtime_id.cpp", "src/datadog/span.cpp", "src/datadog/span_data.cpp", "src/datadog/span_defaults.cpp", @@ -75,6 +76,7 @@ cc_library( "src/datadog/propagation_style.h", "src/datadog/random.h", "src/datadog/rate.h", + "src/datadog/runtime_id.h", "src/datadog/sampling_decision.h", "src/datadog/sampling_mechanism.h", "src/datadog/sampling_priority.h", diff --git a/CMakeLists.txt b/CMakeLists.txt index f97a22d8..a0336c13 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -118,6 +118,7 @@ target_sources(dd_trace_cpp-objects PRIVATE src/datadog/propagation_style.cpp src/datadog/random.cpp src/datadog/rate.cpp + src/datadog/runtime_id.cpp src/datadog/span.cpp src/datadog/span_data.cpp src/datadog/span_defaults.cpp @@ -176,6 +177,7 @@ target_sources(dd_trace_cpp-objects PUBLIC src/datadog/propagation_style.h src/datadog/random.h src/datadog/rate.h + src/datadog/runtime_id.h src/datadog/sampling_decision.h src/datadog/sampling_mechanism.h src/datadog/sampling_priority.h @@ -209,12 +211,12 @@ include_directories(${CMAKE_BINARY_DIR}/include) # Linking this library requires libcurl and threads. find_package(Threads REQUIRED) -target_link_libraries(dd_trace_cpp-objects - PUBLIC - ${CMAKE_BINARY_DIR}/lib/libcurl.a - PUBLIC - Threads::Threads - ${COVERAGE_LIBRARIES} +target_link_libraries(dd_trace_cpp-objects + PUBLIC + ${CMAKE_BINARY_DIR}/lib/libcurl.a + PUBLIC + Threads::Threads + ${COVERAGE_LIBRARIES} ${COREFOUNDATION_LIBRARY} ${SYSTEMCONFIGURATION_LIBRARY} ) diff --git a/src/datadog/runtime_id.cpp b/src/datadog/runtime_id.cpp new file mode 100644 index 00000000..22671690 --- /dev/null +++ b/src/datadog/runtime_id.cpp @@ -0,0 +1,17 @@ +#include "runtime_id.h" + +#include "random.h" + +namespace datadog { +namespace tracing { + +RuntimeID::RuntimeID() {} + +RuntimeID RuntimeID::generate() { + RuntimeID id; + id.uuid_ = uuid(); + return id; +} + +} // namespace tracing +} // namespace datadog diff --git a/src/datadog/runtime_id.h b/src/datadog/runtime_id.h new file mode 100644 index 00000000..6d5b4cb0 --- /dev/null +++ b/src/datadog/runtime_id.h @@ -0,0 +1,24 @@ +#pragma once + +// TODO: document + +#include + +namespace datadog { +namespace tracing { + +class RuntimeID { + std::string uuid_; + RuntimeID(); + + public: + // Return the canonical textual representation of this ID. + const std::string& string() const { return uuid_; } + + // Return a pseudo-randomly generated runtime ID. The underlying generator is + // `random_uint64()` declared in `random.h`. + static RuntimeID generate(); +}; + +} // namespace tracing +} // namespace datadog diff --git a/src/datadog/span_defaults.cpp b/src/datadog/span_defaults.cpp index 44ba6306..387eaf54 100644 --- a/src/datadog/span_defaults.cpp +++ b/src/datadog/span_defaults.cpp @@ -22,7 +22,6 @@ nlohmann::json to_json(const SpanDefaults& defaults) { TO_JSON(version); TO_JSON(name); TO_JSON(tags); - TO_JSON(runtime_id); #undef TO_JSON return result; } diff --git a/src/datadog/span_defaults.h b/src/datadog/span_defaults.h index 0cf81a00..1c9b4fd2 100644 --- a/src/datadog/span_defaults.h +++ b/src/datadog/span_defaults.h @@ -8,7 +8,6 @@ #include #include "json_fwd.hpp" -#include "random.h" namespace datadog { namespace tracing { @@ -20,7 +19,6 @@ struct SpanDefaults { std::string version = ""; std::string name = ""; std::unordered_map tags; - std::string runtime_id = uuid(); }; nlohmann::json to_json(const SpanDefaults&); diff --git a/src/datadog/trace_segment.cpp b/src/datadog/trace_segment.cpp index d476a3fb..5635050c 100644 --- a/src/datadog/trace_segment.cpp +++ b/src/datadog/trace_segment.cpp @@ -83,6 +83,7 @@ TraceSegment::TraceSegment( const std::shared_ptr& trace_sampler, const std::shared_ptr& span_sampler, const std::shared_ptr& defaults, + const RuntimeID& runtime_id, const std::vector& injection_styles, const Optional& hostname, Optional origin, std::size_t tags_header_max_size, @@ -97,6 +98,7 @@ TraceSegment::TraceSegment( trace_sampler_(trace_sampler), span_sampler_(span_sampler), defaults_(defaults), + runtime_id_(runtime_id), injection_styles_(injection_styles), hostname_(hostname), origin_(std::move(origin)), @@ -214,7 +216,7 @@ void TraceSegment::span_finished() { } span.numeric_tags[tags::internal::process_id] = Cache::process_id; span.tags[tags::internal::language] = "cpp"; - span.tags[tags::internal::runtime_id] = defaults_->runtime_id; + span.tags[tags::internal::runtime_id] = runtime_id_.string(); } const auto result = collector_->send(std::move(spans_), trace_sampler_); diff --git a/src/datadog/trace_segment.h b/src/datadog/trace_segment.h index d96bef22..530ef139 100644 --- a/src/datadog/trace_segment.h +++ b/src/datadog/trace_segment.h @@ -61,6 +61,7 @@ class TraceSegment { std::shared_ptr span_sampler_; std::shared_ptr defaults_; + RuntimeID runtime_id_; const std::vector injection_styles_; const Optional hostname_; const Optional origin_; @@ -81,6 +82,7 @@ class TraceSegment { const std::shared_ptr& trace_sampler, const std::shared_ptr& span_sampler, const std::shared_ptr& defaults, + const RuntimeID& runtime_id, const std::vector& injection_styles, const Optional& hostname, Optional origin, std::size_t tags_header_max_size, diff --git a/src/datadog/tracer.cpp b/src/datadog/tracer.cpp index 3da0df4d..077454f7 100644 --- a/src/datadog/tracer.cpp +++ b/src/datadog/tracer.cpp @@ -199,8 +199,8 @@ Expected extract_b3( nlohmann::json make_config_json( StringView tracer_version_string, const Collector& collector, - const SpanDefaults& defaults, const TraceSampler& trace_sampler, - const SpanSampler& span_sampler, + const SpanDefaults& defaults, const RuntimeID& runtime_id, + const TraceSampler& trace_sampler, const SpanSampler& span_sampler, const std::vector& injection_styles, const std::vector& extraction_styles, const Optional& hostname, std::size_t tags_header_max_size) { @@ -208,6 +208,7 @@ nlohmann::json make_config_json( auto config = nlohmann::json::object({ {"version", tracer_version_string}, {"defaults", to_json(defaults)}, + {"runtime_id", runtime_id.string()}, {"collector", collector.config_json()}, {"trace_sampler", trace_sampler.config_json()}, {"span_sampler", span_sampler.config_json()}, @@ -244,8 +245,10 @@ Tracer::Tracer(const FinalizedTracerConfig& config, : logger_(config.logger), collector_(/* see constructor body */), defaults_(std::make_shared(config.defaults)), + runtime_id_(config.runtime_id ? *config.runtime_id + : RuntimeID::generate()), tracer_telemetry_(std::make_shared( - config.report_telemetry, clock, logger_, defaults_)), + config.report_telemetry, clock, logger_, defaults_, runtime_id_)), trace_sampler_( std::make_shared(config.trace_sampler, clock)), span_sampler_(std::make_shared(config.span_sampler, clock)), @@ -278,8 +281,9 @@ Tracer::Tracer(const FinalizedTracerConfig& config, nlohmann::json Tracer::config_json() const { return make_config_json(tracer_version_string, *collector_, *defaults_, - *trace_sampler_, *span_sampler_, injection_styles_, - extraction_styles_, hostname_, tags_header_max_size_); + runtime_id_, *trace_sampler_, *span_sampler_, + injection_styles_, extraction_styles_, hostname_, + tags_header_max_size_); } Span Tracer::create_span() { return create_span(SpanConfig{}); } @@ -300,8 +304,8 @@ Span Tracer::create_span(const SpanConfig& config) { tracer_telemetry_->metrics().tracer.trace_segments_created_new.inc(); const auto segment = std::make_shared( logger_, collector_, tracer_telemetry_, trace_sampler_, span_sampler_, - defaults_, injection_styles_, hostname_, nullopt /* origin */, - tags_header_max_size_, std::move(trace_tags), + defaults_, runtime_id_, injection_styles_, hostname_, + nullopt /* origin */, tags_header_max_size_, std::move(trace_tags), nullopt /* sampling_decision */, nullopt /* additional_w3c_tracestate */, nullopt /* additional_datadog_w3c_tracestate*/, std::move(span_data)); Span span{span_data_ptr, segment, @@ -464,7 +468,7 @@ Expected Tracer::extract_span(const DictReader& reader, tracer_telemetry_->metrics().tracer.trace_segments_created_continued.inc(); const auto segment = std::make_shared( logger_, collector_, tracer_telemetry_, trace_sampler_, span_sampler_, - defaults_, injection_styles_, hostname_, std::move(origin), + defaults_, runtime_id_, injection_styles_, hostname_, std::move(origin), tags_header_max_size_, std::move(trace_tags), std::move(sampling_decision), std::move(additional_w3c_tracestate), std::move(additional_datadog_w3c_tracestate), std::move(span_data)); diff --git a/src/datadog/tracer.h b/src/datadog/tracer.h index de5742fc..55dd6393 100644 --- a/src/datadog/tracer.h +++ b/src/datadog/tracer.h @@ -32,6 +32,7 @@ class Tracer { std::shared_ptr logger_; std::shared_ptr collector_; std::shared_ptr defaults_; + RuntimeID runtime_id_; std::shared_ptr tracer_telemetry_; std::shared_ptr trace_sampler_; std::shared_ptr span_sampler_; diff --git a/src/datadog/tracer_config.cpp b/src/datadog/tracer_config.cpp index 7dfd6f24..7da564e4 100644 --- a/src/datadog/tracer_config.cpp +++ b/src/datadog/tracer_config.cpp @@ -359,6 +359,8 @@ Expected finalize_config(const TracerConfig &config) { result.trace_id_128_bit = config.trace_id_128_bit; } + result.runtime_id = config.runtime_id; + return result; } diff --git a/src/datadog/tracer_config.h b/src/datadog/tracer_config.h index cf103e5c..a7ddff53 100644 --- a/src/datadog/tracer_config.h +++ b/src/datadog/tracer_config.h @@ -13,7 +13,7 @@ #include "error.h" #include "expected.h" #include "propagation_style.h" -#include "random.h" +#include "runtime_id.h" #include "span_defaults.h" #include "span_sampler_config.h" #include "trace_sampler_config.h" @@ -109,6 +109,9 @@ struct TracerConfig { // tracer will generate 64-bit trace IDs. `trace_id_128_bit` is overridden by // the `DD_TRACE_128_BIT_TRACEID_GENERATION_ENABLED` environment variable. bool trace_id_128_bit = false; + + // TODO: document + Optional runtime_id; }; // `FinalizedTracerConfig` contains `Tracer` implementation details derived from @@ -138,8 +141,7 @@ class FinalizedTracerConfig { bool log_on_startup; bool trace_id_128_bit; bool report_telemetry; - - std::string runtime_id = uuid(); + Optional runtime_id; }; // Return a `FinalizedTracerConfig` from the specified `config` and from any diff --git a/src/datadog/tracer_telemetry.cpp b/src/datadog/tracer_telemetry.cpp index dd3e9bd3..21c79f1a 100644 --- a/src/datadog/tracer_telemetry.cpp +++ b/src/datadog/tracer_telemetry.cpp @@ -10,11 +10,13 @@ namespace tracing { TracerTelemetry::TracerTelemetry( bool enabled, const Clock& clock, const std::shared_ptr& logger, - const std::shared_ptr& span_defaults) + const std::shared_ptr& span_defaults, + const RuntimeID& runtime_id) : enabled_(enabled), clock_(clock), logger_(logger), span_defaults_(span_defaults), + runtime_id_(runtime_id), hostname_(get_hostname().value_or("hostname-unavailable")) { if (enabled_) { // Register all the metrics that we're tracking by adding them to the @@ -62,7 +64,7 @@ nlohmann::json TracerTelemetry::generate_telemetry_body( {"seq_id", seq_id_}, {"request_type", request_type}, {"tracer_time", tracer_time}, - {"runtime_id", span_defaults_->runtime_id}, + {"runtime_id", runtime_id_.string()}, {"debug", debug_}, {"application", nlohmann::json::object({ {"service_name", span_defaults_->service}, diff --git a/src/datadog/tracer_telemetry.h b/src/datadog/tracer_telemetry.h index 74dc918d..c1264535 100644 --- a/src/datadog/tracer_telemetry.h +++ b/src/datadog/tracer_telemetry.h @@ -30,6 +30,7 @@ #include "clock.h" #include "json.hpp" #include "metrics.h" +#include "runtime_id.h" namespace datadog { namespace tracing { @@ -43,6 +44,7 @@ class TracerTelemetry { Clock clock_; std::shared_ptr logger_; std::shared_ptr span_defaults_; + RuntimeID runtime_id_; std::string hostname_; uint64_t seq_id_ = 0; // Each metric has an associated MetricSnapshot that contains the data points, @@ -98,7 +100,8 @@ class TracerTelemetry { public: TracerTelemetry(bool enabled, const Clock& clock, const std::shared_ptr& logger, - const std::shared_ptr& span_defaults); + const std::shared_ptr& span_defaults, + const RuntimeID& runtime_id); bool enabled() { return enabled_; }; // Provides access to the telemetry metrics for updating the values. // This value should not be stored. diff --git a/test/test_tracer_telemetry.cpp b/test/test_tracer_telemetry.cpp index cb07d72d..430a1860 100644 --- a/test/test_tracer_telemetry.cpp +++ b/test/test_tracer_telemetry.cpp @@ -23,7 +23,8 @@ TEST_CASE("Tracer telemetry") { auto span_defaults = std::make_shared(); span_defaults->service = "testsvc"; span_defaults->environment = "test"; - TracerTelemetry tracer_telemetry = {true, clock, logger, span_defaults}; + TracerTelemetry tracer_telemetry = {true, clock, logger, span_defaults, + RuntimeID::generate()}; SECTION("generates app-started message") { auto app_started_message = From 26a5eac2099dc281f78bf54634e0841eb3ce5163 Mon Sep 17 00:00:00 2001 From: David Goffredo Date: Thu, 12 Oct 2023 14:47:54 -0400 Subject: [PATCH 25/39] doc 'till you drop --- src/datadog/runtime_id.h | 10 +++++++--- src/datadog/tracer_config.h | 6 +++++- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/src/datadog/runtime_id.h b/src/datadog/runtime_id.h index 6d5b4cb0..47872770 100644 --- a/src/datadog/runtime_id.h +++ b/src/datadog/runtime_id.h @@ -1,6 +1,10 @@ #pragma once -// TODO: document +// This component provides a `class`, `RuntimeID`, that is a wrapper around an +// RFC 4122 UUIDv4. `RuntimeID` identifies the current run of the application in +// which this tracing library is embedded. +// +// See `TracerConfig::runtime_id`, declared in `tracer_config.h`. #include @@ -12,11 +16,11 @@ class RuntimeID { RuntimeID(); public: - // Return the canonical textual representation of this ID. + // Return the canonical textual representation of this runtime ID. const std::string& string() const { return uuid_; } // Return a pseudo-randomly generated runtime ID. The underlying generator is - // `random_uint64()` declared in `random.h`. + // `random_uint64()`, declared in `random.h`. static RuntimeID generate(); }; diff --git a/src/datadog/tracer_config.h b/src/datadog/tracer_config.h index a7ddff53..5ef23270 100644 --- a/src/datadog/tracer_config.h +++ b/src/datadog/tracer_config.h @@ -110,7 +110,11 @@ struct TracerConfig { // the `DD_TRACE_128_BIT_TRACEID_GENERATION_ENABLED` environment variable. bool trace_id_128_bit = false; - // TODO: document + // `runtime_id` denotes the current run of the application in which the tracer + // is embedded. If `runtime_id` is not specified, then it defaults to a + // pseudo-randomly generated value. A server that contains multiple tracers, + // such as those in the worker threads/processes of a reverse proxy, might + // specify the same `runtime_id` for all tracer instances in the same run. Optional runtime_id; }; From 03a9921970bbaebde2976d830682d380aff44182 Mon Sep 17 00:00:00 2001 From: David Goffredo Date: Thu, 12 Oct 2023 15:11:29 -0400 Subject: [PATCH 26/39] don't use the real clock in SpanSampler's limiter test --- test/test_span_sampler.cpp | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/test/test_span_sampler.cpp b/test/test_span_sampler.cpp index 995a10b7..a99a029b 100644 --- a/test/test_span_sampler.cpp +++ b/test/test_span_sampler.cpp @@ -307,7 +307,8 @@ TEST_CASE("span rule limiter") { auto finalized = finalize_config(config); REQUIRE(finalized); - Tracer tracer{*finalized}; + auto clock = [frozen_time = default_clock()]() { return frozen_time; }; + Tracer tracer{*finalized, clock}; for (std::size_t i = 0; i < test_case.num_spans; ++i) { auto span = tracer.create_span(); @@ -326,10 +327,5 @@ TEST_CASE("span rule limiter") { } } - // The `TestCase` that expects 100 span allowed once failed because 101 were - // allowed. I'm not sure how that works, but we are using a real clock and - // different machines run these cases at different rates, so let's build in a - // fudge factor. - REQUIRE(count_of_sampled_spans >= test_case.expected_count - 10); - REQUIRE(count_of_sampled_spans <= test_case.expected_count + 10); + REQUIRE(count_of_sampled_spans == test_case.expected_count); } From 04e1a173234857a1205c6ed78a1643676f043dde Mon Sep 17 00:00:00 2001 From: Caleb Gilmour Date: Thu, 12 Oct 2023 19:41:48 +0000 Subject: [PATCH 27/39] Disable telemetry for curl test that's intended for single requests --- test/test_curl.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/test/test_curl.cpp b/test/test_curl.cpp index 065a57fd..968f432e 100644 --- a/test/test_curl.cpp +++ b/test/test_curl.cpp @@ -144,6 +144,9 @@ TEST_CASE("parse response headers and body") { config.defaults.service = "testsvc"; config.logger = logger; config.agent.http_client = client; + // The http client is a mock that only expects a single request, so + // force only tracing to be sent and exclude telemetry. + config.report_telemetry = false; const auto finalized = finalize_config(config); REQUIRE(finalized); From 688906d34208a0e5326327c62f8c307bfadaa0e0 Mon Sep 17 00:00:00 2001 From: David Goffredo Date: Thu, 12 Oct 2023 17:55:50 -0400 Subject: [PATCH 28/39] Add mutex to MockLogger, seems to be prevent SIGSEGV now that telemetry is making Curl requests. --- test/mocks/loggers.h | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/test/mocks/loggers.h b/test/mocks/loggers.h index c41b4278..eeed55b2 100644 --- a/test/mocks/loggers.h +++ b/test/mocks/loggers.h @@ -5,6 +5,7 @@ #include #include +#include #include #include #include @@ -28,6 +29,7 @@ struct MockLogger : public Logger { std::variant payload; }; + mutable std::mutex mutex; std::ostream* echo = nullptr; std::vector entries; @@ -41,6 +43,7 @@ struct MockLogger : public Logger { MockLogger() = default; void log_error(const LogFunc& write) override { + std::lock_guard lock{mutex}; std::ostringstream stream; write(stream); if (echo) { @@ -50,6 +53,7 @@ struct MockLogger : public Logger { } void log_startup(const LogFunc& write) override { + std::lock_guard lock{mutex}; std::ostringstream stream; write(stream); if (echo && policy == ERRORS_AND_STARTUP) { @@ -59,6 +63,7 @@ struct MockLogger : public Logger { } void log_error(const Error& error) override { + std::lock_guard lock{mutex}; if (echo) { *echo << error << '\n'; } @@ -66,6 +71,7 @@ struct MockLogger : public Logger { } void log_error(StringView message) override { + std::lock_guard lock{mutex}; if (echo) { *echo << message << '\n'; } @@ -77,6 +83,7 @@ struct MockLogger : public Logger { int startup_count() const { return count(Entry::STARTUP); } int count(Entry::Kind kind) const { + std::lock_guard lock{mutex}; return std::count_if( entries.begin(), entries.end(), [kind](const Entry& entry) { return entry.kind == kind; }); @@ -84,6 +91,7 @@ struct MockLogger : public Logger { const Error& first_error() const { REQUIRE(error_count() > 0); + std::lock_guard lock{mutex}; auto found = std::find_if( entries.begin(), entries.end(), [](const Entry& entry) { return entry.kind == Entry::ERROR; }); @@ -92,6 +100,7 @@ struct MockLogger : public Logger { const std::string& first_startup() const { REQUIRE(startup_count() > 0); + std::lock_guard lock{mutex}; auto found = std::find_if( entries.begin(), entries.end(), [](const Entry& entry) { return entry.kind == Entry::STARTUP; }); From 1ad8f70bf46d74ac3dd2b6b355e25113b2928099 Mon Sep 17 00:00:00 2001 From: Caleb Gilmour Date: Thu, 12 Oct 2023 22:43:46 +0000 Subject: [PATCH 29/39] Bump coverage percentage --- test/test_metrics.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/test_metrics.cpp b/test/test_metrics.cpp index d8354e73..4fc0f705 100644 --- a/test/test_metrics.cpp +++ b/test/test_metrics.cpp @@ -19,9 +19,9 @@ TEST_CASE("Counter metrics") { TEST_CASE("Gauge metrics") { GaugeMetric metric = {"test.gauge.metric", {"testing-testing:123"}, true}; - + metric.set(40); metric.inc(); - metric.add(50); + metric.add(10); metric.sub(8); metric.dec(); REQUIRE(metric.value() == 42); From c839b3a7fd525e297e0a33fafffcd5740b7b4941 Mon Sep 17 00:00:00 2001 From: Caleb Gilmour Date: Thu, 12 Oct 2023 22:50:09 +0000 Subject: [PATCH 30/39] Only pass `this` when calling methods --- src/datadog/datadog_agent.cpp | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/datadog/datadog_agent.cpp b/src/datadog/datadog_agent.cpp index 20169df6..d14670d1 100644 --- a/src/datadog/datadog_agent.cpp +++ b/src/datadog/datadog_agent.cpp @@ -265,20 +265,20 @@ void DatadogAgent::flush() { // This is the callback for the HTTP response. It's invoked // asynchronously. - auto on_response = [this, samplers = std::move(response_handlers), + auto on_response = [telemetry = tracer_telemetry_, samplers = std::move(response_handlers), logger = logger_](int response_status, const DictReader& /*response_headers*/, std::string response_body) { if (response_status >= 500) { - tracer_telemetry_->metrics().trace_api.responses_5xx.inc(); + telemetry->metrics().trace_api.responses_5xx.inc(); } else if (response_status >= 400) { - tracer_telemetry_->metrics().trace_api.responses_4xx.inc(); + telemetry->metrics().trace_api.responses_4xx.inc(); } else if (response_status >= 300) { - tracer_telemetry_->metrics().trace_api.responses_3xx.inc(); + telemetry->metrics().trace_api.responses_3xx.inc(); } else if (response_status >= 200) { - tracer_telemetry_->metrics().trace_api.responses_2xx.inc(); + telemetry->metrics().trace_api.responses_2xx.inc(); } else if (response_status >= 100) { - tracer_telemetry_->metrics().trace_api.responses_1xx.inc(); + telemetry->metrics().trace_api.responses_1xx.inc(); } if (response_status != 200) { logger->log_error([&](auto& stream) { @@ -315,8 +315,8 @@ void DatadogAgent::flush() { // This is the callback for if something goes wrong sending the // request or retrieving the response. It's invoked // asynchronously. - auto on_error = [this, logger = logger_](Error error) { - tracer_telemetry_->metrics().trace_api.errors_network.inc(); + auto on_error = [telemetry = tracer_telemetry_, logger = logger_](Error error) { + telemetry->metrics().trace_api.errors_network.inc(); logger->log_error(error.with_prefix( "Error occurred during HTTP request for submitting traces: ")); }; From 95c5d0b487f9a0f7efc7ac13d92a0de6f0dfec6c Mon Sep 17 00:00:00 2001 From: Caleb Gilmour Date: Thu, 12 Oct 2023 23:09:08 +0000 Subject: [PATCH 31/39] Log errors with a prefix --- src/datadog/datadog_agent.cpp | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/src/datadog/datadog_agent.cpp b/src/datadog/datadog_agent.cpp index d14670d1..c4ee59b9 100644 --- a/src/datadog/datadog_agent.cpp +++ b/src/datadog/datadog_agent.cpp @@ -265,7 +265,8 @@ void DatadogAgent::flush() { // This is the callback for the HTTP response. It's invoked // asynchronously. - auto on_response = [telemetry = tracer_telemetry_, samplers = std::move(response_handlers), + auto on_response = [telemetry = tracer_telemetry_, + samplers = std::move(response_handlers), logger = logger_](int response_status, const DictReader& /*response_headers*/, std::string response_body) { @@ -315,7 +316,8 @@ void DatadogAgent::flush() { // This is the callback for if something goes wrong sending the // request or retrieving the response. It's invoked // asynchronously. - auto on_error = [telemetry = tracer_telemetry_, logger = logger_](Error error) { + auto on_error = [telemetry = tracer_telemetry_, + logger = logger_](Error error) { telemetry->metrics().trace_api.errors_network.inc(); logger->log_error(error.with_prefix( "Error occurred during HTTP request for submitting traces: ")); @@ -326,7 +328,8 @@ void DatadogAgent::flush() { traces_endpoint_, std::move(set_request_headers), std::move(body), std::move(on_response), std::move(on_error)); if (auto* error = post_result.if_error()) { - logger_->log_error(*error); + logger_->log_error( + error->with_prefix("Unexpected error submitting traces: ")); } } @@ -336,7 +339,8 @@ void DatadogAgent::send_app_started(nlohmann::json&& tracer_config) { telemetry_endpoint_, telemetry_set_request_headers_, std::move(payload), telemetry_on_response_, telemetry_on_error_); if (auto* error = post_result.if_error()) { - logger_->log_error(*error); + logger_->log_error( + error->with_prefix("Unexpected error submitting telemetry: ")); } } @@ -346,7 +350,8 @@ void DatadogAgent::send_heartbeat_and_telemetry() { telemetry_endpoint_, telemetry_set_request_headers_, std::move(payload), telemetry_on_response_, telemetry_on_error_); if (auto* error = post_result.if_error()) { - logger_->log_error(*error); + logger_->log_error( + error->with_prefix("Unexpected error submitting traces: ")); } } @@ -356,7 +361,8 @@ void DatadogAgent::send_app_closing() { telemetry_endpoint_, telemetry_set_request_headers_, std::move(payload), telemetry_on_response_, telemetry_on_error_); if (auto* error = post_result.if_error()) { - logger_->log_error(*error); + logger_->log_error( + error->with_prefix("Unexpected error submitting traces: ")); } } From 5ec176907f6516b903811dd90ef46c378e1dc306 Mon Sep 17 00:00:00 2001 From: Caleb Gilmour Date: Thu, 12 Oct 2023 23:12:30 +0000 Subject: [PATCH 32/39] Unconstification --- src/datadog/metrics.cpp | 18 +++++++++--------- src/datadog/metrics.h | 22 ++++++++++------------ 2 files changed, 19 insertions(+), 21 deletions(-) diff --git a/src/datadog/metrics.cpp b/src/datadog/metrics.cpp index 44468c17..19eaa47e 100644 --- a/src/datadog/metrics.cpp +++ b/src/datadog/metrics.cpp @@ -5,24 +5,24 @@ namespace datadog { namespace tracing { -Metric::Metric(const std::string name, std::string type, - const std::vector tags, bool common) +Metric::Metric(std::string name, std::string type, + std::vector tags, bool common) : name_(name), type_(type), tags_(tags), common_(common) {} -const std::string Metric::name() { return name_; } -const std::string Metric::type() { return type_; } -const std::vector Metric::tags() { return tags_; } +std::string Metric::name() { return name_; } +std::string Metric::type() { return type_; } +std::vector Metric::tags() { return tags_; } bool Metric::common() { return common_; } uint64_t Metric::value() { return value_; } uint64_t Metric::capture_and_reset_value() { return value_.exchange(0); } -CounterMetric::CounterMetric(const std::string name, - const std::vector tags, bool common) +CounterMetric::CounterMetric(std::string name, std::vector tags, + bool common) : Metric(name, "count", tags, common) {} void CounterMetric::inc() { add(1); } void CounterMetric::add(uint64_t amount) { value_ += amount; } -GaugeMetric::GaugeMetric(const std::string name, - const std::vector tags, bool common) +GaugeMetric::GaugeMetric(std::string name, std::vector tags, + bool common) : Metric(name, "gauge", tags, common) {} void GaugeMetric::set(uint64_t value) { value_ = value; } void GaugeMetric::inc() { add(1); } diff --git a/src/datadog/metrics.h b/src/datadog/metrics.h index ad994fde..0455812e 100644 --- a/src/datadog/metrics.h +++ b/src/datadog/metrics.h @@ -17,11 +17,11 @@ class Metric { // The name of the metric that will be published. A transformation occurs // based on the name and whether it is "common" or "language-specific" when it // is recorded. - const std::string name_; + std::string name_; // The type of the metric. This will currently be count or gauge. - const std::string type_; + std::string type_; // Tags associated with this specific instance of the metric. - const std::vector tags_; + std::vector tags_; // This affects the transformation of the metric name, where it can be a // common telemetry metric, or a language-specific metric that is prefixed // with the language name. @@ -29,15 +29,15 @@ class Metric { protected: std::atomic value_ = 0; - Metric(const std::string name, std::string type, - const std::vector tags, bool common); + Metric(std::string name, std::string type, std::vector tags, + bool common); public: // Accessors for name, type, tags, common and capture_and_reset_value are used // when producing the JSON message for reporting metrics. - const std::string name(); - const std::string type(); - const std::vector tags(); + std::string name(); + std::string type(); + std::vector tags(); bool common(); uint64_t value(); uint64_t capture_and_reset_value(); @@ -47,8 +47,7 @@ class Metric { // number of actions, or incrementing the current number of actions by 1. class CounterMetric : public Metric { public: - CounterMetric(const std::string name, const std::vector tags, - bool common); + CounterMetric(std::string name, std::vector tags, bool common); void inc(); void add(uint64_t amount); }; @@ -58,8 +57,7 @@ class CounterMetric : public Metric { // state by 1. class GaugeMetric : public Metric { public: - GaugeMetric(const std::string name, const std::vector tags, - bool common); + GaugeMetric(std::string name, std::vector tags, bool common); void set(uint64_t value); void inc(); void add(uint64_t amount); From a92cdf324d0f465c6957273df199d8899a034688 Mon Sep 17 00:00:00 2001 From: Caleb Gilmour Date: Thu, 12 Oct 2023 23:14:35 +0000 Subject: [PATCH 33/39] Reordering things --- src/datadog/tracer_telemetry.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/datadog/tracer_telemetry.h b/src/datadog/tracer_telemetry.h index c1264535..64dcf67e 100644 --- a/src/datadog/tracer_telemetry.h +++ b/src/datadog/tracer_telemetry.h @@ -47,14 +47,6 @@ class TracerTelemetry { RuntimeID runtime_id_; std::string hostname_; uint64_t seq_id_ = 0; - // Each metric has an associated MetricSnapshot that contains the data points, - // represented as a timestamp and the value of that metric. - using MetricSnapshot = std::vector>; - // This uses a reference_wrapper so references to internal metric values can - // be captured, and be iterated trivially when the values need to be - // snapshotted and published in telemetry messages. - std::vector, MetricSnapshot>> - metrics_snapshots_; // This structure contains all the metrics that are exposed by tracer // telemetry. struct { @@ -94,6 +86,14 @@ class TracerTelemetry { } trace_api; } metrics_; + // Each metric has an associated MetricSnapshot that contains the data points, + // represented as a timestamp and the value of that metric. + using MetricSnapshot = std::vector>; + // This uses a reference_wrapper so references to internal metric values can + // be captured, and be iterated trivially when the values need to be + // snapshotted and published in telemetry messages. + std::vector, MetricSnapshot>> + metrics_snapshots_; nlohmann::json generate_telemetry_body(std::string request_type); From d9c2288f6266b36928ed8b93714814b76612cc5e Mon Sep 17 00:00:00 2001 From: Caleb Gilmour Date: Thu, 12 Oct 2023 23:17:07 +0000 Subject: [PATCH 34/39] std::time_t --- src/datadog/tracer_telemetry.cpp | 12 ++++++------ src/datadog/tracer_telemetry.h | 2 +- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/datadog/tracer_telemetry.cpp b/src/datadog/tracer_telemetry.cpp index 21c79f1a..a0c3233e 100644 --- a/src/datadog/tracer_telemetry.cpp +++ b/src/datadog/tracer_telemetry.cpp @@ -55,9 +55,9 @@ TracerTelemetry::TracerTelemetry( nlohmann::json TracerTelemetry::generate_telemetry_body( std::string request_type) { - time_t tracer_time = std::chrono::duration_cast( - clock_().wall.time_since_epoch()) - .count(); + std::time_t tracer_time = std::chrono::duration_cast( + clock_().wall.time_since_epoch()) + .count(); seq_id_++; return nlohmann::json::object({ {"api_version", "v2"}, @@ -99,9 +99,9 @@ std::string TracerTelemetry::app_started(nlohmann::json&& tracer_config) { } void TracerTelemetry::capture_metrics() { - time_t timepoint = std::chrono::duration_cast( - clock_().wall.time_since_epoch()) - .count(); + std::time_t timepoint = std::chrono::duration_cast( + clock_().wall.time_since_epoch()) + .count(); for (auto& m : metrics_snapshots_) { auto value = m.first.get().capture_and_reset_value(); if (value == 0) { diff --git a/src/datadog/tracer_telemetry.h b/src/datadog/tracer_telemetry.h index 64dcf67e..3fed8fb8 100644 --- a/src/datadog/tracer_telemetry.h +++ b/src/datadog/tracer_telemetry.h @@ -88,7 +88,7 @@ class TracerTelemetry { } metrics_; // Each metric has an associated MetricSnapshot that contains the data points, // represented as a timestamp and the value of that metric. - using MetricSnapshot = std::vector>; + using MetricSnapshot = std::vector>; // This uses a reference_wrapper so references to internal metric values can // be captured, and be iterated trivially when the values need to be // snapshotted and published in telemetry messages. From 6e9e28d21a36e2f4e7bbdbdd6128c0513929cee1 Mon Sep 17 00:00:00 2001 From: Caleb Gilmour Date: Thu, 12 Oct 2023 23:22:41 +0000 Subject: [PATCH 35/39] Change datadog_agent test --- test/mocks/http_clients.h | 5 +---- test/test_datadog_agent.cpp | 4 ++++ 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/test/mocks/http_clients.h b/test/mocks/http_clients.h index c30c0cb6..1124861c 100644 --- a/test/mocks/http_clients.h +++ b/test/mocks/http_clients.h @@ -39,12 +39,9 @@ struct MockHTTPClient : public HTTPClient { ResponseHandler on_response_; ErrorHandler on_error_; - Expected post(const URL& url, HeadersSetter set_headers, + Expected post(const URL& /* url */, HeadersSetter set_headers, std::string /*body*/, ResponseHandler on_response, ErrorHandler on_error) override { - if (url.path != "/v0.4/traces") { - return {}; - } std::lock_guard lock{mutex_}; if (!post_error) { on_response_ = on_response; diff --git a/test/test_datadog_agent.cpp b/test/test_datadog_agent.cpp index 12b371d6..02590bf0 100644 --- a/test/test_datadog_agent.cpp +++ b/test/test_datadog_agent.cpp @@ -22,6 +22,10 @@ TEST_CASE("CollectorResponse") { config.logger = logger; config.agent.event_scheduler = event_scheduler; config.agent.http_client = http_client; + // Tests currently only cover sending traces to the agent. + // Submiting telemetry performs essentially the same steps, but may be added + // in the future. + config.report_telemetry = false; auto finalized = finalize_config(config); REQUIRE(finalized); From bdb9d9f74e6f963e13e58f8967c1e9b1c8993cdb Mon Sep 17 00:00:00 2001 From: Caleb Gilmour Date: Fri, 13 Oct 2023 05:02:42 +0000 Subject: [PATCH 36/39] Only send interval for gauge metrics. --- src/datadog/tracer_telemetry.cpp | 28 ++++++++++++++++++++-------- 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/src/datadog/tracer_telemetry.cpp b/src/datadog/tracer_telemetry.cpp index a0c3233e..8d1dba45 100644 --- a/src/datadog/tracer_telemetry.cpp +++ b/src/datadog/tracer_telemetry.cpp @@ -124,14 +124,26 @@ std::string TracerTelemetry::heartbeat_and_telemetry() { auto& metric = m.first.get(); auto& points = m.second; if (!points.empty()) { - metrics.emplace_back(nlohmann::json::object({ - {"metric", metric.name()}, - {"tags", metric.tags()}, - {"type", metric.type()}, - {"interval", 60}, - {"points", points}, - {"common", metric.common()}, - })); + auto type = metric.type(); + if (type == "count") { + metrics.emplace_back(nlohmann::json::object({ + {"metric", metric.name()}, + {"tags", metric.tags()}, + {"type", metric.type()}, + {"points", points}, + {"common", metric.common()}, + })); + } else if (type == "gauge") { + // gauge metrics have a interval + metrics.emplace_back(nlohmann::json::object({ + {"metric", metric.name()}, + {"tags", metric.tags()}, + {"type", metric.type()}, + {"interval", 10}, + {"points", points}, + {"common", metric.common()}, + })); + } } points.clear(); } From 28dd1c44023f6f22aaa63f59420c1d83ae09e725 Mon Sep 17 00:00:00 2001 From: Caleb Gilmour Date: Mon, 16 Oct 2023 23:46:25 +0000 Subject: [PATCH 37/39] Fix logged error message --- src/datadog/datadog_agent.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/datadog/datadog_agent.cpp b/src/datadog/datadog_agent.cpp index c4ee59b9..481aa0b3 100644 --- a/src/datadog/datadog_agent.cpp +++ b/src/datadog/datadog_agent.cpp @@ -340,7 +340,7 @@ void DatadogAgent::send_app_started(nlohmann::json&& tracer_config) { telemetry_on_response_, telemetry_on_error_); if (auto* error = post_result.if_error()) { logger_->log_error( - error->with_prefix("Unexpected error submitting telemetry: ")); + error->with_prefix("Unexpected error submitting telemetry app-started event: ")); } } @@ -351,7 +351,7 @@ void DatadogAgent::send_heartbeat_and_telemetry() { telemetry_on_response_, telemetry_on_error_); if (auto* error = post_result.if_error()) { logger_->log_error( - error->with_prefix("Unexpected error submitting traces: ")); + error->with_prefix("Unexpected error submitting telemetry app-heartbeat event: ")); } } @@ -362,7 +362,7 @@ void DatadogAgent::send_app_closing() { telemetry_on_response_, telemetry_on_error_); if (auto* error = post_result.if_error()) { logger_->log_error( - error->with_prefix("Unexpected error submitting traces: ")); + error->with_prefix("Unexpected error submitting telemetry app-closing event: ")); } } From eb04fe8b3b7b08c3933d8d6f617f534c903a8d0a Mon Sep 17 00:00:00 2001 From: Caleb Gilmour Date: Tue, 17 Oct 2023 00:03:05 +0000 Subject: [PATCH 38/39] Fix the app-closing metrics payload --- src/datadog/tracer_telemetry.cpp | 28 ++++++++++++++++++++-------- 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/src/datadog/tracer_telemetry.cpp b/src/datadog/tracer_telemetry.cpp index 8d1dba45..1d302fbb 100644 --- a/src/datadog/tracer_telemetry.cpp +++ b/src/datadog/tracer_telemetry.cpp @@ -178,14 +178,26 @@ std::string TracerTelemetry::app_closing() { auto& metric = m.first.get(); auto& points = m.second; if (!points.empty()) { - metrics.emplace_back(nlohmann::json::object({ - {"metric", metric.name()}, - {"tags", metric.tags()}, - {"type", metric.type()}, - {"interval", 60}, - {"points", points}, - {"common", metric.common()}, - })); + auto type = metric.type(); + if (type == "count") { + metrics.emplace_back(nlohmann::json::object({ + {"metric", metric.name()}, + {"tags", metric.tags()}, + {"type", metric.type()}, + {"points", points}, + {"common", metric.common()}, + })); + } else if (type == "gauge") { + // gauge metrics have a interval + metrics.emplace_back(nlohmann::json::object({ + {"metric", metric.name()}, + {"tags", metric.tags()}, + {"type", metric.type()}, + {"interval", 10}, + {"points", points}, + {"common", metric.common()}, + })); + } } points.clear(); } From c57a039bb2077eb4d341736473200a08402ac51b Mon Sep 17 00:00:00 2001 From: Caleb Gilmour Date: Tue, 17 Oct 2023 00:15:58 +0000 Subject: [PATCH 39/39] Formatting. --- src/datadog/datadog_agent.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/datadog/datadog_agent.cpp b/src/datadog/datadog_agent.cpp index 481aa0b3..dcb7dc32 100644 --- a/src/datadog/datadog_agent.cpp +++ b/src/datadog/datadog_agent.cpp @@ -339,8 +339,8 @@ void DatadogAgent::send_app_started(nlohmann::json&& tracer_config) { telemetry_endpoint_, telemetry_set_request_headers_, std::move(payload), telemetry_on_response_, telemetry_on_error_); if (auto* error = post_result.if_error()) { - logger_->log_error( - error->with_prefix("Unexpected error submitting telemetry app-started event: ")); + logger_->log_error(error->with_prefix( + "Unexpected error submitting telemetry app-started event: ")); } } @@ -350,8 +350,8 @@ void DatadogAgent::send_heartbeat_and_telemetry() { telemetry_endpoint_, telemetry_set_request_headers_, std::move(payload), telemetry_on_response_, telemetry_on_error_); if (auto* error = post_result.if_error()) { - logger_->log_error( - error->with_prefix("Unexpected error submitting telemetry app-heartbeat event: ")); + logger_->log_error(error->with_prefix( + "Unexpected error submitting telemetry app-heartbeat event: ")); } } @@ -361,8 +361,8 @@ void DatadogAgent::send_app_closing() { telemetry_endpoint_, telemetry_set_request_headers_, std::move(payload), telemetry_on_response_, telemetry_on_error_); if (auto* error = post_result.if_error()) { - logger_->log_error( - error->with_prefix("Unexpected error submitting telemetry app-closing event: ")); + logger_->log_error(error->with_prefix( + "Unexpected error submitting telemetry app-closing event: ")); } }