From 21263b1b50e2125abb111a0043d951c8fe80f1a9 Mon Sep 17 00:00:00 2001 From: Apisit Ritruengroj <38898766+apskhem@users.noreply.github.com> Date: Thu, 16 Jan 2025 12:57:25 +0700 Subject: [PATCH 1/2] feat(cat-gateway): Add memory metrics to improve observability (#1499) * refactor(cat-gateway): Restructure cat-gateway metrics module (#1483) * feat: initial files * refactor: new structure * feat: initial crate * fix: mem deps * feat: memory tracker * feat: error handling * feat: register module * fix: region * chore: fmtfix * chore: lintfix * feat: is initialized * chore: cspell * refactor: rw * feat: memory metrics envar * fix: cspell * chore: fmtfix * feat: metrics * chore: final cleanup --- catalyst-gateway/bin/Cargo.toml | 2 + catalyst-gateway/bin/src/metrics/memory.rs | 166 ++++++++++++++++++ catalyst-gateway/bin/src/metrics/mod.rs | 2 + catalyst-gateway/bin/src/settings/mod.rs | 34 ++-- .../bin/src/settings/str_env_var.rs | 38 ++++ 5 files changed, 227 insertions(+), 15 deletions(-) diff --git a/catalyst-gateway/bin/Cargo.toml b/catalyst-gateway/bin/Cargo.toml index f55833967ad..565f0ed396c 100644 --- a/catalyst-gateway/bin/Cargo.toml +++ b/catalyst-gateway/bin/Cargo.toml @@ -98,6 +98,8 @@ regex = "1.11.1" minijinja = "2.5.0" bytes = "1.9.0" mime = "0.3.17" +stats_alloc = "0.1.10" +memory-stats = "1.0.0" [dev-dependencies] proptest = "1.5.0" diff --git a/catalyst-gateway/bin/src/metrics/memory.rs b/catalyst-gateway/bin/src/metrics/memory.rs index 7da8bd333e9..13874d98744 100644 --- a/catalyst-gateway/bin/src/metrics/memory.rs +++ b/catalyst-gateway/bin/src/metrics/memory.rs @@ -1 +1,167 @@ //! Metrics related to memory analytics. + +use std::{ + alloc::System, + sync::atomic::{AtomicBool, Ordering}, + thread, +}; + +use memory_stats::{memory_stats, MemoryStats}; +use stats_alloc::{Region, StatsAlloc, INSTRUMENTED_SYSTEM}; + +use crate::settings::Settings; + +/// Use the instrumented allocator for gathering allocation statistics. +/// Note: This wraps the global allocator. +/// All structs that use the global allocator can be tracked. +#[global_allocator] +static GLOBAL: &StatsAlloc = &INSTRUMENTED_SYSTEM; + +/// This is to prevent the init function from accidentally being called multiple times. +static IS_INITIALIZED: AtomicBool = AtomicBool::new(false); + +/// Starts a background thread to periodically update memory metrics. +/// +/// This function spawns a thread that updates the global `MemoryMetrics` +/// structure at regular intervals defined by `UPDATE_INTERVAL_MILLI`. +pub(crate) fn init_metrics_updater() { + if IS_INITIALIZED.swap(true, Ordering::SeqCst) { + return; + } + + let stats = Region::new(GLOBAL); + let api_host_names = Settings::api_host_names().join(","); + let service_id = Settings::service_id(); + + thread::spawn(move || { + loop { + { + let allocator_stats = stats.change(); + let mem_stats = memory_stats().unwrap_or({ + MemoryStats { + physical_mem: 0, + virtual_mem: 0, + } + }); + + reporter::MEMORY_PHYSICAL_USAGE + .with_label_values(&[&api_host_names, service_id]) + .set(i64::try_from(mem_stats.physical_mem).unwrap_or(-1)); + reporter::MEMORY_VIRTUAL_USAGE + .with_label_values(&[&api_host_names, service_id]) + .set(i64::try_from(mem_stats.virtual_mem).unwrap_or(-1)); + reporter::MEMORY_ALLOCATION_COUNT + .with_label_values(&[&api_host_names, service_id]) + .set(i64::try_from(allocator_stats.allocations).unwrap_or(-1)); + reporter::MEMORY_DEALLOCATION_COUNT + .with_label_values(&[&api_host_names, service_id]) + .set(i64::try_from(allocator_stats.deallocations).unwrap_or(-1)); + reporter::MEMORY_REALLOCATION_COUNT + .with_label_values(&[&api_host_names, service_id]) + .set(i64::try_from(allocator_stats.reallocations).unwrap_or(-1)); + reporter::MEMORY_BYTES_ALLOCATED + .with_label_values(&[&api_host_names, service_id]) + .set(i64::try_from(allocator_stats.bytes_allocated).unwrap_or(-1)); + reporter::MEMORY_BYTES_DEALLOCATED + .with_label_values(&[&api_host_names, service_id]) + .set(i64::try_from(allocator_stats.bytes_deallocated).unwrap_or(-1)); + reporter::MEMORY_BYTES_REALLOCATED + .with_label_values(&[&api_host_names, service_id]) + .set(i64::try_from(allocator_stats.bytes_reallocated).unwrap_or(-1)); + } + + thread::sleep(Settings::metrics_memory_interval()); + } + }); +} + +/// All the related memory reporting metrics to the Prometheus service are inside this +/// module. +mod reporter { + use std::sync::LazyLock; + + use prometheus::{register_int_gauge_vec, IntGaugeVec}; + + /// Labels for the client metrics + const MEMORY_METRIC_LABELS: [&str; 2] = ["api_host_names", "service_id"]; + + /// The "physical" memory used by this process, in bytes. + pub(super) static MEMORY_PHYSICAL_USAGE: LazyLock = LazyLock::new(|| { + register_int_gauge_vec!( + "memory_physical_usage", + "Amount of physical memory usage in bytes", + &MEMORY_METRIC_LABELS + ) + .unwrap() + }); + + /// The "virtual" memory used by this process, in bytes. + pub(super) static MEMORY_VIRTUAL_USAGE: LazyLock = LazyLock::new(|| { + register_int_gauge_vec!( + "memory_virtual_usage", + "Amount of physical virtual usage in bytes", + &MEMORY_METRIC_LABELS + ) + .unwrap() + }); + + /// The number of allocation count in the heap. + pub(super) static MEMORY_ALLOCATION_COUNT: LazyLock = LazyLock::new(|| { + register_int_gauge_vec!( + "memory_allocation_count", + "Number of allocation count in the heap", + &MEMORY_METRIC_LABELS + ) + .unwrap() + }); + + /// The number of deallocation count in the heap. + pub(super) static MEMORY_DEALLOCATION_COUNT: LazyLock = LazyLock::new(|| { + register_int_gauge_vec!( + "memory_deallocation_count", + "Number of deallocation count in the heap", + &MEMORY_METRIC_LABELS + ) + .unwrap() + }); + + /// The number of reallocation count in the heap. + pub(super) static MEMORY_REALLOCATION_COUNT: LazyLock = LazyLock::new(|| { + register_int_gauge_vec!( + "memory_reallocation_count", + "Number of reallocation count in the heap", + &MEMORY_METRIC_LABELS + ) + .unwrap() + }); + + /// The amount of accumulative allocated bytes in the heap. + pub(super) static MEMORY_BYTES_ALLOCATED: LazyLock = LazyLock::new(|| { + register_int_gauge_vec!( + "memory_bytes_allocated", + "Amount of accumulative allocated bytes in the heap", + &MEMORY_METRIC_LABELS + ) + .unwrap() + }); + + /// The amount of accumulative deallocated bytes in the heap. + pub(super) static MEMORY_BYTES_DEALLOCATED: LazyLock = LazyLock::new(|| { + register_int_gauge_vec!( + "memory_bytes_deallocated", + "Amount of accumulative deallocated bytes in the heap", + &MEMORY_METRIC_LABELS + ) + .unwrap() + }); + + /// The amount of accumulative reallocated bytes in the heap. + pub(super) static MEMORY_BYTES_REALLOCATED: LazyLock = LazyLock::new(|| { + register_int_gauge_vec!( + "memory_bytes_reallocated", + "Amount of accumulative reallocated bytes in the heap", + &MEMORY_METRIC_LABELS + ) + .unwrap() + }); +} diff --git a/catalyst-gateway/bin/src/metrics/mod.rs b/catalyst-gateway/bin/src/metrics/mod.rs index ba8d71503da..74725efee27 100644 --- a/catalyst-gateway/bin/src/metrics/mod.rs +++ b/catalyst-gateway/bin/src/metrics/mod.rs @@ -14,5 +14,7 @@ pub(crate) mod memory; /// Returns the default prometheus registry. #[must_use] pub(crate) fn init_prometheus() -> Registry { + memory::init_metrics_updater(); + default_registry().clone() } diff --git a/catalyst-gateway/bin/src/settings/mod.rs b/catalyst-gateway/bin/src/settings/mod.rs index 6be03945050..d3702d39373 100644 --- a/catalyst-gateway/bin/src/settings/mod.rs +++ b/catalyst-gateway/bin/src/settings/mod.rs @@ -11,7 +11,6 @@ use anyhow::anyhow; use cardano_chain_follower::Network; use clap::Args; use dotenvy::dotenv; -use duration_string::DurationString; use str_env_var::StringEnvVar; use tracing::error; use url::Url; @@ -52,6 +51,9 @@ const API_URL_PREFIX_DEFAULT: &str = "/api"; /// Default `CHECK_CONFIG_TICK` used in development. const CHECK_CONFIG_TICK_DEFAULT: &str = "5s"; +/// Default `METRICS_MEMORY_INTERVAL`. +const METRICS_MEMORY_INTERVAL_DEFAULT: &str = "1s"; + /// Default Event DB URL. const EVENT_DB_URL_DEFAULT: &str = "postgresql://postgres:postgres@localhost/catalyst_events?sslmode=disable"; @@ -144,6 +146,9 @@ struct EnvVars { /// Tick every N seconds until config exists in db #[allow(unused)] check_config_tick: Duration, + + /// Interval for updating and sending memory metrics. + metrics_memory_interval: Duration, } // Lazy initialization of all env vars which are not command line parameters. @@ -157,19 +162,6 @@ static ENV_VARS: LazyLock = LazyLock::new(|| { // Support env vars in a `.env` file, doesn't need to exist. dotenv().ok(); - let check_interval = StringEnvVar::new("CHECK_CONFIG_TICK", CHECK_CONFIG_TICK_DEFAULT.into()); - let check_config_tick = match DurationString::try_from(check_interval.as_string()) { - Ok(duration) => duration.into(), - Err(error) => { - error!( - "Invalid Check Config Tick Duration: {} : {}. Defaulting to 5 seconds.", - check_interval.as_str(), - error - ); - Duration::from_secs(5) - }, - }; - EnvVars { github_repo_owner: StringEnvVar::new("GITHUB_REPO_OWNER", GITHUB_REPO_OWNER_DEFAULT.into()), github_repo_name: StringEnvVar::new("GITHUB_REPO_NAME", GITHUB_REPO_NAME_DEFAULT.into()), @@ -194,7 +186,14 @@ static ENV_VARS: LazyLock = LazyLock::new(|| { ), chain_follower: chain_follower::EnvVars::new(), internal_api_key: StringEnvVar::new_optional("INTERNAL_API_KEY", true), - check_config_tick, + check_config_tick: StringEnvVar::new_as_duration( + "CHECK_CONFIG_TICK", + CHECK_CONFIG_TICK_DEFAULT, + ), + metrics_memory_interval: StringEnvVar::new_as_duration( + "METRICS_MEMORY_INTERVAL", + METRICS_MEMORY_INTERVAL_DEFAULT, + ), } }); @@ -288,6 +287,11 @@ impl Settings { ENV_VARS.service_id.as_str() } + /// The memory metrics interval + pub(crate) fn metrics_memory_interval() -> Duration { + ENV_VARS.metrics_memory_interval + } + /// Get a list of all host names to serve the API on. /// /// Used by the `OpenAPI` Documentation to point to the correct backend. diff --git a/catalyst-gateway/bin/src/settings/str_env_var.rs b/catalyst-gateway/bin/src/settings/str_env_var.rs index 18e8a6414b7..4676e29eb93 100644 --- a/catalyst-gateway/bin/src/settings/str_env_var.rs +++ b/catalyst-gateway/bin/src/settings/str_env_var.rs @@ -1,10 +1,15 @@ //! Processing for String Environment Variables + +// cspell: words smhdwy + use std::{ env::{self, VarError}, fmt::{self, Display}, str::FromStr, + time::Duration, }; +use duration_string::DurationString; use strum::VariantNames; use tracing::{error, info}; @@ -168,6 +173,39 @@ impl StringEnvVar { value } + /// Convert an Envvar into the required Duration type. + pub(crate) fn new_as_duration(var_name: &str, default: &str) -> Duration { + let choices = "A value in the format of `[0-9]+(ns|us|ms|[smhdwy])`"; + + let raw_value = StringEnvVar::new( + var_name, + (default.to_string().as_str(), false, choices).into(), + ) + .as_string(); + + match DurationString::try_from(raw_value.clone()) { + Ok(duration) => duration.into(), + Err(error) => { + error!( + "Invalid Duration: {} : {}. Defaulting to {}.", + raw_value, error, default + ); + + match DurationString::try_from(default.to_string()) { + Ok(duration) => duration.into(), + // The error from parsing the default value must not happen + Err(error) => { + error!( + "Invalid Default Duration: {} : {}. Defaulting to 1s.", + default, error + ); + Duration::from_secs(1) + }, + } + }, + } + } + /// Convert an Envvar into an integer in the bounded range. pub(super) fn new_as(var_name: &str, default: T, min: T, max: T) -> T where From 56454b6468e42b37c53b5f509ee410b202085136 Mon Sep 17 00:00:00 2001 From: Stefano Cunego <93382903+kukkok3@users.noreply.github.com> Date: Thu, 16 Jan 2025 07:17:09 +0100 Subject: [PATCH 2/2] test(cat-gateway): adds nightly schemathesis (#1504) * wip * wip * wip * wip * wip * wip * fix: remove scylla earthfile * feat: runs schemathesis only on api/started endpoint * clean up * fix: blueprint * fix: blueprint * fix: blueprint * fix: blueprint * fix: remove hooks * fix: remove some checks * feat: adds seed param to chemathesis package * fix: poetry * fix: earthly dockerd bug * fix: remove checks all param * fix: remove duplicated function * fix: regex * adds all check * chore: clean up * clean up * chore: update schemathesis version * fix: update schemathesis option to latest version * fixes * fix: st target failing statment * feat: adds schemathesis test target * test run * test run * remove test values --------- Co-authored-by: Steven Johnson Co-authored-by: Oleksandr Prokhorenko --- .../tests/schemathesis_tests/Earthfile | 74 +++++++++++++++++-- .../tests/schemathesis_tests/blueprint.cue | 1 + 2 files changed, 68 insertions(+), 7 deletions(-) diff --git a/catalyst-gateway/tests/schemathesis_tests/Earthfile b/catalyst-gateway/tests/schemathesis_tests/Earthfile index ac66d900f4f..f5cdb0f9a4c 100644 --- a/catalyst-gateway/tests/schemathesis_tests/Earthfile +++ b/catalyst-gateway/tests/schemathesis_tests/Earthfile @@ -3,9 +3,8 @@ VERSION 0.8 package-schemathesis: FROM python:3.12-alpine3.20 # TODO: https://github.com/input-output-hk/catalyst-voices/issues/465 - ARG openapi_spec - # optional argument that can be used to pass a --hypothesis-seed to replicate specific test failures - ARG seed + ARG api_spec + ARG seed # optional argument that can be used to pass a --hypothesis-seed to replicate specific test failures ARG version=3.39.5 RUN apk add --no-cache gcc musl-dev @@ -13,9 +12,10 @@ package-schemathesis: RUN mkdir /results COPY ./hooks/hooks.py . VOLUME /results - ENTRYPOINT st run --include-path-regex '^/api/v1/health/' \ + + ENTRYPOINT st run --exclude-path-regex 'draft' \ --exclude-path '/api/v1/health/inspection' \ #excluding since this is a internal debug endpoint - $openapi_spec \ + $api_spec \ --workers=2 \ --wait-for-schema=500 \ --max-response-time=5000 \ @@ -37,12 +37,12 @@ test-fuzzer-api: FROM earthly/dind:alpine-3.19-docker-25.0.5-r0 RUN apk update && apk add iptables-legacy curl # workaround for https://github.com/earthly/earthly/issues/3784 COPY schemathesis-docker-compose.yml . - LET OPENAPI_SPEC="http://0.0.0.0:3030/docs/cat-gateway.json" + LET api_spec="http://0.0.0.0:3030/docs/cat-gateway.json" ARG seed WITH DOCKER \ --compose schemathesis-docker-compose.yml \ - --load schemathesis:latest=(+package-schemathesis --openapi_spec=$OPENAPI_SPEC --seed=$seed) \ + --load schemathesis:latest=(+package-schemathesis --api_spec=$api_spec --seed=$seed) \ --load event-db:latest=(../../event-db+build) \ --load cat-gateway:latest=(../+package-cat-gateway-integration) \ --service event-db \ @@ -60,4 +60,64 @@ test-fuzzer-api: IF [ -f fail ] RUN --no-cache echo "Schemathesis test failed. Check the logs for more details" && \ exit 1 + END + +nightly-package-schemathesis: + FROM python:3.12-alpine3.20 + # TODO: https://github.com/input-output-hk/catalyst-voices/issues/465 + ARG api_spec + # optional argument that can be used to pass a --hypothesis-seed to replicate specific test failures + ARG seed + ARG version=3.39.5 + + RUN apk add --no-cache gcc musl-dev + RUN python -m pip install schemathesis==$version + RUN mkdir /results + COPY ./hooks/hooks.py . + VOLUME /results + ENTRYPOINT st run --checks=all $api_spec \ + --workers=2 \ + --wait-for-schema=120 \ + --max-response-time=300 \ + --hypothesis-max-examples=1000 \ + --data-generation-method=all \ + --exclude-deprecated \ + --force-schema-version=30 \ + --show-trace \ + --force-color \ + --junit-xml=/results/junit-report.xml \ + --cassette-path=/results/cassette.yaml \ + $seed + + ARG tag="latest" + SAVE IMAGE schemathesis:$tag + +# nightly-test-fuzzer-api - Fuzzy test cat-gateway using openapi specs. +nightly-test-fuzzer-api: + FROM earthly/dind:alpine-3.19-docker-25.0.5-r0 + RUN apk update && apk add iptables-legacy curl # workaround for https://github.com/earthly/earthly/issues/3784 + COPY schemathesis-docker-compose.yml . + LET api_spec="http://0.0.0.0:3030/docs/cat-gateway.json" + ARG seed + + WITH DOCKER \ + --compose schemathesis-docker-compose.yml \ + --load schemathesis:latest=(+nightly-package-schemathesis --api_spec=$api_spec --seed=$seed) \ + --load event-db:latest=(../../event-db+build) \ + --load cat-gateway:latest=(../+package-cat-gateway-integration) \ + --service event-db \ + --service cat-gateway \ + --allow-privileged + + RUN --no-cache docker run --net=host --name=st schemathesis:latest || echo fail > fail; \ + docker cp st:/results/junit-report.xml junit-report.xml && \ + docker cp st:/results/cassette.yaml cassette.yaml + END + WAIT + SAVE ARTIFACT junit-report.xml AS LOCAL schemathesis-nightly.junit-report.xml + SAVE ARTIFACT cassette.yaml AS LOCAL cassette.yaml + END + IF [ -f fail ] + RUN --no-cache echo "Nightly schemathesis test failed. Check the logs for more details" && \ + exit 1 END \ No newline at end of file diff --git a/catalyst-gateway/tests/schemathesis_tests/blueprint.cue b/catalyst-gateway/tests/schemathesis_tests/blueprint.cue index ab7bb81baf9..134e32aaa5e 100644 --- a/catalyst-gateway/tests/schemathesis_tests/blueprint.cue +++ b/catalyst-gateway/tests/schemathesis_tests/blueprint.cue @@ -4,6 +4,7 @@ project: { ci: { targets: { "test-fuzzer-api": privileged: true + "nightly-test-fuzzer-api": privileged: true } } }