Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore: improve actors allocated metric #1812

Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 1 addition & 8 deletions packages/services/pegboard/src/metrics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,17 +8,10 @@ lazy_static::lazy_static! {
*REGISTRY
).unwrap();

pub static ref CLIENT_LAST_PING: IntGaugeVec = register_int_gauge_vec_with_registry!(
"pegboard_client_last_ping",
"Last client ping timestamp, in ms.",
&["client_id"],
*REGISTRY
).unwrap();

pub static ref CLIENT_ACTORS_ALLOCATED: IntGaugeVec = register_int_gauge_vec_with_registry!(
"pegboard_client_actors_allocated",
"Total actors allocated on a client.",
&["client_id"],
&["datacenter_id", "client_id", "flavor", "inactive"],
*REGISTRY
).unwrap();
}
64 changes: 34 additions & 30 deletions packages/services/pegboard/standalone/metrics-publish/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,18 +1,24 @@
use chirp_workflow::prelude::*;
use pegboard::protocol::ClientFlavor;

/// How long to continue including a row in the query after a client has been deleted. This is so we can add
/// `inactive=true` for the prometheus metric.
const INACTIVE_THRESHOLD_MS: i64 = util::duration::hours(1);

pub async fn start(config: rivet_config::Config, pools: rivet_pools::Pools) -> GlobalResult<()> {
let mut interval = tokio::time::interval(std::time::Duration::from_secs(15));
loop {
interval.tick().await;

run_from_env(config.clone(), pools.clone()).await?;
run_from_env(config.clone(), pools.clone(), util::timestamp::now()).await?;
}
}

#[tracing::instrument(skip_all)]
pub async fn run_from_env(
config: rivet_config::Config,
pools: rivet_pools::Pools,
ts: i64,
) -> GlobalResult<()> {
let client =
chirp_client::SharedClient::from_env(pools.clone())?.wrap_new("pegboard-metrics-publish");
Expand All @@ -25,38 +31,36 @@ pub async fn run_from_env(
)
.await?;

let (client_ping, client_actors) = tokio::try_join!(
sql_fetch_all!(
[ctx, (Uuid, i64)]
"
SELECT client_id, last_ping_ts
FROM db_pegboard.clients AS OF SYSTEM TIME '-1s'
WHERE delete_ts IS NULL
",
),
sql_fetch_all!(
[ctx, (Uuid, i64)]
"
SELECT a.client_id, COUNT(*)
FROM db_pegboard.actors AS a
JOIN db_pegboard.clients AS c
ON a.client_id = c.client_id
AS OF SYSTEM TIME '-1s'
WHERE c.delete_ts IS NULL
GROUP BY a.client_id
",
),
)?;
let client_actors = sql_fetch_all!(
[ctx, (Uuid, Uuid, i64, bool, i64)]
"
SELECT
c.datacenter_id,
c.client_id,
c.flavor,
(c.drain_ts IS NOT NULL OR c.delete_ts IS NOT NULL) AS inactive,
COUNT(a.client_id)
FROM db_pegboard.clients AS c
LEFT JOIN db_pegboard.actors AS a
ON a.client_id = c.client_id
AS OF SYSTEM TIME '-1s'
WHERE c.delete_ts IS NULL OR c.delete_ts > $1
GROUP BY c.client_id
",
ts - INACTIVE_THRESHOLD_MS,
)
.await?;

for (client_id, last_ping_ts) in client_ping {
pegboard::metrics::CLIENT_LAST_PING
.with_label_values(&[&client_id.to_string()])
.set(last_ping_ts);
}
for (datacenter_id, client_id, flavor, inactive, count) in client_actors {
let flavor = unwrap!(ClientFlavor::from_repr(flavor.try_into()?));

for (client_id, count) in client_actors {
pegboard::metrics::CLIENT_ACTORS_ALLOCATED
.with_label_values(&[&client_id.to_string()])
.with_label_values(&[
&datacenter_id.to_string(),
&client_id.to_string(),
&flavor.to_string(),
&inactive.to_string(),
])
.set(count.try_into()?);
}

Expand Down
Loading