From 6ee501d7fead5cf184bb8c20018010d3f3b70e4d Mon Sep 17 00:00:00 2001 From: Denny Pradipta Date: Fri, 27 Dec 2024 18:09:44 +0700 Subject: [PATCH 1/8] Feat: Improve Prometheus Metrics --- docs/src/pages/guides/cli-options.md | 18 +- src/components/config/get.ts | 27 ++- src/components/probe/prober/http/index.ts | 13 ++ src/components/probe/prober/index.ts | 6 + src/events/index.ts | 6 + src/loaders/index.ts | 2 + src/looper/index.ts | 2 +- src/plugins/metrics/prometheus/collector.ts | 191 ++++++++++++-------- 8 files changed, 178 insertions(+), 87 deletions(-) diff --git a/docs/src/pages/guides/cli-options.md b/docs/src/pages/guides/cli-options.md index f4f0532f0..a0ae43f76 100644 --- a/docs/src/pages/guides/cli-options.md +++ b/docs/src/pages/guides/cli-options.md @@ -245,13 +245,17 @@ Then you can scrape the metrics from `http://localhost:3001/metrics`. Monika exposes [Prometheus default metrics](https://prometheus.io/docs/instrumenting/writing_clientlibs/#standard-and-runtime-collectors), [Node.js specific metrics](https://github.com/siimon/prom-client/tree/master/lib/metrics), and Monika probe metrics below. -| Metric Name | Type | Purpose | Label | -| -------------------------------------- | --------- | -------------------------------------------- | ------------------------------------------- | -| `monika_probes_total` | Gauge | Collect total probe | - | -| `monika_request_status_code_info` | Gauge | Collect HTTP status code | `id`, `name`, `url`, `method` | -| `monika_request_response_time_seconds` | Histogram | Collect duration of probe request in seconds | `id`, `name`, `url`, `method`, `statusCode` | -| `monika_request_response_size_bytes` | Gauge | Collect size of response size in bytes | `id`, `name`, `url`, `method`, `statusCode` | -| `monika_alert_total` | Counter | Collect total alert triggered | `id`, `name`, `url`, `method`, `alertQuery` | +| Metric Name | Type | Purpose | Labels | +| -------------------------------------- | --------- | --------------------------------------------------------- | ----------------------------------------------------- | +| `monika_alerts_triggered` | Counter | Collect count of alerts triggered by a probe | `id`, `name`, `url`, `method`, `alertQuery` | +| `monika_alerts_triggered_total` | Counter | Collect total count of alerts triggered | - | +| `monika_probes_running` | Gauge | Indicates whether a probe is running (1) or idle (0) | `id` | +| `monika_probes_running_total` | Gauge | Collect total count of probes currently running checks | - | +| `monika_probes_status` | Gauge | Indicates the current status of a probe: 0 = DOWN, 1 = UP | `id`, `name`, `url`, `method` | +| `monika_probes_total` | Gauge | Collect total number of probes configured | - | +| `monika_request_response_size_bytes` | Gauge | Collect size of the response in bytes | `id`, `name`, `url`, `method`, `statusCode`, `result` | +| `monika_request_response_time_seconds` | Histogram | Collect duration of probe request in seconds | `id`, `name`, `url`, `method`, `statusCode`, `result` | +| `monika_request_status_code_info` | Gauge | Collect HTTP status code of the probe request | `id`, `name`, `url`, `method` | ## Repeat diff --git a/src/components/config/get.ts b/src/components/config/get.ts index b63a2ca57..548a0d68c 100644 --- a/src/components/config/get.ts +++ b/src/components/config/get.ts @@ -22,6 +22,7 @@ * SOFTWARE. * **********************************************************************************/ +import { randomUUID } from 'node:crypto' import { getContext } from '../../context' import type { Config } from '../../interfaces/config' import { log } from '../../utils/pino' @@ -41,7 +42,10 @@ export async function getRawConfig(): Promise { return addDefaultNotifications(config) } - return config + // Add default alerts for Probe not Accessible + const finalizedConfig = addDefaultAlerts(config) + + return finalizedConfig } // mergeConfigs merges configs by overwriting each other @@ -82,6 +86,27 @@ async function parseNativeConfig(): Promise { ) } +export const FAILED_REQUEST_ASSERTION = { + assertion: '', + message: 'Probe not accessible', +} + +function addDefaultAlerts(config: Config) { + return { + ...config, + probes: config.probes.map((probe) => ({ + ...probe, + alerts: [ + ...(probe.alerts || []), + { + id: randomUUID(), + ...FAILED_REQUEST_ASSERTION, + }, + ], + })), + } +} + async function parseNonNativeConfig(): Promise { const { flags } = getContext() const hasNonNativeConfig = diff --git a/src/components/probe/prober/http/index.ts b/src/components/probe/prober/http/index.ts index cf10de425..19312977b 100644 --- a/src/components/probe/prober/http/index.ts +++ b/src/components/probe/prober/http/index.ts @@ -159,6 +159,12 @@ export class HTTPProber extends BaseProber { response, }) + getEventEmitter().emit(events.probe.status.changed, { + probe: this.probeConfig, + requestIndex, + status: 'up', + }) + this.logMessage( true, getProbeResultMessage({ @@ -226,6 +232,13 @@ export class HTTPProber extends BaseProber { } const alertId = getAlertID(url, validation, probeID) + console.log(this.probeConfig) + getEventEmitter().emit(events.probe.status.changed, { + probe: this.probeConfig, + requestIndex, + status: 'down', + }) + getEventEmitter().emit(events.probe.alert.triggered, { probe: this.probeConfig, requestIndex, diff --git a/src/components/probe/prober/index.ts b/src/components/probe/prober/index.ts index b6731a175..1c24c1129 100644 --- a/src/components/probe/prober/index.ts +++ b/src/components/probe/prober/index.ts @@ -134,6 +134,7 @@ export abstract class BaseProber implements Prober { // this probe is definitely in incident state because of fail assertion, so send notification, etc. this.handleFailedProbe(probeResults) + return } @@ -148,6 +149,11 @@ export abstract class BaseProber implements Prober { requestIndex: index, response: requestResponse, }) + getEventEmitter().emit(events.probe.status.changed, { + probe: this.probeConfig, + requestIndex: index, + status: 'up', + }) logResponseTime(requestResponse.responseTime) if ( diff --git a/src/events/index.ts b/src/events/index.ts index 2bec870c5..f698e8049 100644 --- a/src/events/index.ts +++ b/src/events/index.ts @@ -34,6 +34,9 @@ export default { sanitized: 'CONFIG_SANITIZED', updated: 'CONFIG_UPDATED', }, + notifications: { + sent: 'NOTIFICATIONS_SENT', + }, probe: { alert: { triggered: 'PROBE_ALERT_TRIGGERED', @@ -46,5 +49,8 @@ export default { notification: { willSend: 'PROBE_NOTIFICATION_WILL_SEND', }, + status: { + changed: 'PROBE_STATUS_CHANGED', + }, }, } diff --git a/src/loaders/index.ts b/src/loaders/index.ts index 3f079b351..63d639196 100644 --- a/src/loaders/index.ts +++ b/src/loaders/index.ts @@ -82,6 +82,7 @@ function initPrometheus(prometheusPort: number) { decrementProbeRunningTotal, incrementProbeRunningTotal, resetProbeRunningTotal, + collectProbeStatus, } = new PrometheusCollector() // collect prometheus metrics @@ -93,6 +94,7 @@ function initPrometheus(prometheusPort: number) { eventEmitter.on(events.probe.ran, incrementProbeRunningTotal) eventEmitter.on(events.probe.finished, decrementProbeRunningTotal) eventEmitter.on(events.config.updated, resetProbeRunningTotal) + eventEmitter.on(events.probe.status.changed, collectProbeStatus) startPrometheusMetricsServer(prometheusPort) } diff --git a/src/looper/index.ts b/src/looper/index.ts index b2c04b350..3051e46e3 100644 --- a/src/looper/index.ts +++ b/src/looper/index.ts @@ -69,7 +69,7 @@ export function sanitizeProbe(isSymonMode: boolean, probe: Probe): Probe { ...probe, incidentThreshold: incidentThreshold || DEFAULT_INCIDENT_THRESHOLD, recoveryThreshold: recoveryThreshold || DEFAULT_RECOVERY_THRESHOLD, - alerts: isSymonMode ? [] : addFailedRequestAssertions(alerts), + alerts: addFailedRequestAssertions(alerts), } } diff --git a/src/plugins/metrics/prometheus/collector.ts b/src/plugins/metrics/prometheus/collector.ts index e4e47dc6f..8a2130c88 100644 --- a/src/plugins/metrics/prometheus/collector.ts +++ b/src/plugins/metrics/prometheus/collector.ts @@ -35,15 +35,17 @@ import type { ProbeRequestResponse } from '../../../interfaces/request' type PrometheusCustomCollector = { statusCode: Gauge<'id' | 'name' | 'url' | 'method'> - probeResult: Gauge<'id' | 'name' | 'url' | 'method'> - probeRunningTotal: Gauge<'id'> responseTime: Histogram< 'id' | 'name' | 'url' | 'method' | 'statusCode' | 'result' > responseSize: Gauge< 'id' | 'name' | 'url' | 'method' | 'statusCode' | 'result' > - alertTriggeredTotal: Counter<'id' | 'name' | 'url' | 'method' | 'alertQuery'> + alertsTriggered: Counter<'id' | 'name' | 'url' | 'method' | 'alertQuery'> + alertsTriggeredTotal: Counter + probesStatus: Gauge<'id' | 'name' | 'url' | 'method'> + probesRunningTotal: Gauge<'id'> + probesRunning: Gauge probesTotal: Gauge } @@ -61,19 +63,36 @@ export class PrometheusCollector { register.clear() // register metric collector - const statusCode = new Gauge({ - name: 'monika_request_status_code_info', - help: 'HTTP status code', - labelNames: ['id', 'name', 'url', 'method'] as const, + const alertsTriggered = new Counter({ + name: 'monika_alerts_triggered', + help: 'Indicates the count of alerts triggered by a probe', + labelNames: ['id', 'name', 'url', 'method', 'alertQuery'] as const, + }) + const alertsTriggeredTotal = new Counter({ + name: 'monika_alerts_triggered_total', + help: 'Indicates the count of total alert triggered of all probes', + }) + const probesRunning = new Gauge({ + name: 'monika_probes_running', + help: 'Indicates whether a Monika probe is actively running checks (1 = RUNNING) or idle (0 = IDLE).', + labelNames: ['id'] as const, + }) + const probesRunningTotal = new Gauge({ + name: 'monika_probes_running_total', + help: 'Indicates the total count of probes that are currently running checks', }) - const probeResult = new Gauge({ - name: 'monika_probe_result', - help: 'Probe result: -1=unknown, 0=failed, 1=success', + const probesStatus = new Gauge({ + name: 'monika_probes_status', + help: 'Indicates the current status of the probe: 0 = DOWN (unreachable), 1 = UP (reachable).', labelNames: ['id', 'name', 'url', 'method'] as const, }) - const responseTime = new Histogram({ - name: 'monika_request_response_time_seconds', - help: 'Duration of probe request in seconds', + const probesTotal = new Gauge({ + name: 'monika_probes_total', + help: 'Total count of all probes configured', + }) + const responseSize = new Gauge({ + name: 'monika_request_response_size_bytes', + help: "Indicates the size of probe request's response size in bytes", labelNames: [ 'id', 'name', @@ -83,9 +102,9 @@ export class PrometheusCollector { 'result', ] as const, }) - const responseSize = new Gauge({ - name: 'monika_request_response_size_bytes', - help: 'Size of response size in bytes', + const responseTime = new Histogram({ + name: 'monika_request_response_time_seconds', + help: 'Indicates the duration of the probe request in seconds', labelNames: [ 'id', 'name', @@ -95,67 +114,28 @@ export class PrometheusCollector { 'result', ] as const, }) - const alertTriggeredTotal = new Counter({ - name: 'monika_alert_total', - help: 'Total alert triggered', - labelNames: ['id', 'name', 'url', 'method', 'alertQuery'] as const, - }) - const probeRunningTotal = new Gauge({ - name: 'monika_probe_running_total', - help: 'Total of probe running', - labelNames: ['id'] as const, - }) - const probesTotal = new Gauge({ - name: 'monika_probes_total', - help: 'Total of all probe', + const statusCode = new Gauge({ + name: 'monika_request_status_code_info', + help: 'Indicates the HTTP status code of the probe request', + labelNames: ['id', 'name', 'url', 'method'] as const, }) // register and collect default Node.js metrics collectDefaultMetrics({ register }) prometheusCustomCollector = { - statusCode, - probeResult, - responseTime, - responseSize, - alertTriggeredTotal, - probeRunningTotal, + alertsTriggered, + alertsTriggeredTotal, + probesRunningTotal, + probesRunning, + probesStatus, probesTotal, + responseSize, + responseTime, + statusCode, } } - collectProbeTotal(total: number): void { - if (!prometheusCustomCollector) { - throw new Error('Prometheus collector is not registered') - } - - prometheusCustomCollector.probesTotal.set(total) - } - - decrementProbeRunningTotal(id: string) { - if (!prometheusCustomCollector) { - throw new Error('Prometheus collector is not registered') - } - - prometheusCustomCollector.probeRunningTotal.labels(id).dec() - } - - incrementProbeRunningTotal(id: string) { - if (!prometheusCustomCollector) { - throw new Error('Prometheus collector is not registered') - } - - prometheusCustomCollector.probeRunningTotal.labels(id).inc() - } - - resetProbeRunningTotal() { - if (!prometheusCustomCollector) { - throw new Error('Prometheus collector is not registered') - } - - prometheusCustomCollector.probeRunningTotal.reset() - } - collectProbeRequestMetrics(probeResult: ProbeResult): void { if (!prometheusCustomCollector) { throw new Error('Prometheus collector is not registered') @@ -188,7 +168,6 @@ export class PrometheusCollector { } const { statusCode, - probeResult: probeResultCollector, responseTime: resposeTimeCollector, responseSize, } = prometheusCustomCollector @@ -202,18 +181,46 @@ export class PrometheusCollector { method: method ?? 'GET', }) .set(status) - probeResultCollector - ?.labels({ - id, - name, - url, - method: method ?? 'GET', - }) - .set(result) resposeTimeCollector?.labels(labels).observe(responseTimeInSecond) responseSize?.labels(labels).set(responseSizeBytes || 0) } + collectProbeStatus( + probeResult: { status: 'up' | 'down' } & Omit + ): void { + if (!prometheusCustomCollector) { + throw new Error('Prometheus collector is not registered') + } + + const { probe, requestIndex, status } = probeResult + const { id, name, requests } = probe + + if (!requests || requests.length === 0) { + return + } + + const request = requests[requestIndex] + const { method, url } = request + const labels = { + id, + name, + url, + method: method ?? 'GET', + } + const { probesStatus } = prometheusCustomCollector + + // collect metrics + probesStatus?.labels(labels).set(status === 'up' ? 1 : 0) + } + + collectProbeTotal(total: number): void { + if (!prometheusCustomCollector) { + throw new Error('Prometheus collector is not registered') + } + + prometheusCustomCollector.probesTotal.set(total) + } + collectTriggeredAlert( probeResult: { alertQuery: string } & Omit ): void { @@ -237,9 +244,37 @@ export class PrometheusCollector { method: method ?? 'GET', alertQuery, } - const { alertTriggeredTotal } = prometheusCustomCollector + const { alertsTriggered, alertsTriggeredTotal } = prometheusCustomCollector // collect metrics - alertTriggeredTotal?.labels(labels).inc() + alertsTriggered?.labels(labels).inc() + alertsTriggeredTotal?.inc() + } + + decrementProbeRunningTotal(id: string) { + if (!prometheusCustomCollector) { + throw new Error('Prometheus collector is not registered') + } + + prometheusCustomCollector.probesRunning.labels(id).dec() + prometheusCustomCollector.probesRunningTotal.dec() + } + + incrementProbeRunningTotal(id: string) { + if (!prometheusCustomCollector) { + throw new Error('Prometheus collector is not registered') + } + + prometheusCustomCollector.probesRunning.labels(id).inc() + prometheusCustomCollector.probesRunningTotal.inc() + } + + resetProbeRunningTotal() { + if (!prometheusCustomCollector) { + throw new Error('Prometheus collector is not registered') + } + + prometheusCustomCollector.probesRunning.reset() + prometheusCustomCollector.probesRunningTotal.reset() } } From f741c4d546e71b3c1128d17078f0255e301af1d1 Mon Sep 17 00:00:00 2001 From: Denny Pradipta Date: Fri, 27 Dec 2024 18:18:55 +0700 Subject: [PATCH 2/8] Remove console.log --- src/components/probe/prober/http/index.ts | 1 - 1 file changed, 1 deletion(-) diff --git a/src/components/probe/prober/http/index.ts b/src/components/probe/prober/http/index.ts index 19312977b..4eae85f1a 100644 --- a/src/components/probe/prober/http/index.ts +++ b/src/components/probe/prober/http/index.ts @@ -232,7 +232,6 @@ export class HTTPProber extends BaseProber { } const alertId = getAlertID(url, validation, probeID) - console.log(this.probeConfig) getEventEmitter().emit(events.probe.status.changed, { probe: this.probeConfig, requestIndex, From 2f08a60a365aa9a7d942b9add490f663185ceeb5 Mon Sep 17 00:00:00 2001 From: Denny Pradipta Date: Fri, 27 Dec 2024 18:20:09 +0700 Subject: [PATCH 3/8] Fix test --- src/looper/index.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/looper/index.ts b/src/looper/index.ts index 3051e46e3..b2c04b350 100644 --- a/src/looper/index.ts +++ b/src/looper/index.ts @@ -69,7 +69,7 @@ export function sanitizeProbe(isSymonMode: boolean, probe: Probe): Probe { ...probe, incidentThreshold: incidentThreshold || DEFAULT_INCIDENT_THRESHOLD, recoveryThreshold: recoveryThreshold || DEFAULT_RECOVERY_THRESHOLD, - alerts: addFailedRequestAssertions(alerts), + alerts: isSymonMode ? [] : addFailedRequestAssertions(alerts), } } From e77b4d327358f542a971ca6101894e7fd6870ec0 Mon Sep 17 00:00:00 2001 From: Denny Pradipta Date: Fri, 3 Jan 2025 13:56:37 +0700 Subject: [PATCH 4/8] Fix broken test --- src/components/config/get.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/components/config/get.ts b/src/components/config/get.ts index 548a0d68c..cfeeb050a 100644 --- a/src/components/config/get.ts +++ b/src/components/config/get.ts @@ -94,7 +94,7 @@ export const FAILED_REQUEST_ASSERTION = { function addDefaultAlerts(config: Config) { return { ...config, - probes: config.probes.map((probe) => ({ + probes: config.probes?.map((probe) => ({ ...probe, alerts: [ ...(probe.alerts || []), From b686ecc65bce577128d76a7f68f9348c31a23ae7 Mon Sep 17 00:00:00 2001 From: Denny Pradipta Date: Fri, 3 Jan 2025 17:45:20 +0700 Subject: [PATCH 5/8] Feat: Updated Metrics --- docs/src/pages/guides/cli-options.md | 24 +++++------ packages/notification/index.ts | 16 +++++--- src/components/notification/index.ts | 10 ++++- src/components/probe/prober/http/index.ts | 2 +- src/loaders/index.ts | 2 + src/plugins/metrics/prometheus/collector.ts | 44 ++++++++++++++++++--- src/plugins/metrics/prometheus/publisher.ts | 5 ++- 7 files changed, 77 insertions(+), 26 deletions(-) diff --git a/docs/src/pages/guides/cli-options.md b/docs/src/pages/guides/cli-options.md index a0ae43f76..6126a805e 100644 --- a/docs/src/pages/guides/cli-options.md +++ b/docs/src/pages/guides/cli-options.md @@ -245,17 +245,19 @@ Then you can scrape the metrics from `http://localhost:3001/metrics`. Monika exposes [Prometheus default metrics](https://prometheus.io/docs/instrumenting/writing_clientlibs/#standard-and-runtime-collectors), [Node.js specific metrics](https://github.com/siimon/prom-client/tree/master/lib/metrics), and Monika probe metrics below. -| Metric Name | Type | Purpose | Labels | -| -------------------------------------- | --------- | --------------------------------------------------------- | ----------------------------------------------------- | -| `monika_alerts_triggered` | Counter | Collect count of alerts triggered by a probe | `id`, `name`, `url`, `method`, `alertQuery` | -| `monika_alerts_triggered_total` | Counter | Collect total count of alerts triggered | - | -| `monika_probes_running` | Gauge | Indicates whether a probe is running (1) or idle (0) | `id` | -| `monika_probes_running_total` | Gauge | Collect total count of probes currently running checks | - | -| `monika_probes_status` | Gauge | Indicates the current status of a probe: 0 = DOWN, 1 = UP | `id`, `name`, `url`, `method` | -| `monika_probes_total` | Gauge | Collect total number of probes configured | - | -| `monika_request_response_size_bytes` | Gauge | Collect size of the response in bytes | `id`, `name`, `url`, `method`, `statusCode`, `result` | -| `monika_request_response_time_seconds` | Histogram | Collect duration of probe request in seconds | `id`, `name`, `url`, `method`, `statusCode`, `result` | -| `monika_request_status_code_info` | Gauge | Collect HTTP status code of the probe request | `id`, `name`, `url`, `method` | +| Metric Name | Type | Purpose | Labels | +| -------------------------------------- | --------- | --------------------------------------------------------------------- | ----------------------------------------------------- | +| `monika_alerts_triggered` | Counter | Indicates the count of alerts triggered | `id`, `name`, `url`, `method`, `alertQuery` | +| `monika_alerts_triggered_total` | Counter | Indicates the cumulative count of alerts triggered | - | +| `monika_probes_running` | Gauge | Indicates whether a probe is running (1) or idle (0) | `id` | +| `monika_probes_running_total` | Gauge | Indicates the total count of probes that are currently running checks | - | +| `monika_probes_status` | Gauge | Indicates whether a probe is healthy (1) or is having an incident (0) | `id`, `name`, `url`, `method` | +| `monika_probes_total` | Gauge | Total count of all probes configured | - | +| `monika_request_response_size_bytes` | Gauge | Indicates the size of probe request's response in bytes | `id`, `name`, `url`, `method`, `statusCode`, `result` | +| `monika_request_response_time_seconds` | Histogram | Indicates the duration of the probe request in seconds | `id`, `name`, `url`, `method`, `statusCode`, `result` | +| `monika_request_status_code_info` | Gauge | Indicates the HTTP status code of the probe requests' response(s) | `id`, `name`, `url`, `method` | +| `monika_notifications_triggered` | Counter | Indicates the count of notifications triggered | `type`, `status` | +| `monika_notifications_triggered_total` | Counter | Indicates the cumulative count of notifications triggered | - | ## Repeat diff --git a/packages/notification/index.ts b/packages/notification/index.ts index 2876c0a9f..757551341 100644 --- a/packages/notification/index.ts +++ b/packages/notification/index.ts @@ -34,29 +34,33 @@ async function sendNotifications( notifications: Notification[], message: NotificationMessage, sender?: InputSender -): Promise { +): Promise<{ type: string; success: boolean }[]> { if (sender) { updateSender(sender) } - await Promise.all( + // Map notifications to an array of results + const results = await Promise.all( notifications.map(async ({ data, type }) => { const channel = channels[type] - try { if (!channel) { throw new Error('Notification channel is not available') } await channel.send(data, message) + return { type, success: true } } catch (error: unknown) { - const message = getErrorMessage(error) - throw new Error( - `Failed to send message using ${type}, please check your ${type} notification config.\nMessage: ${message}` + const errorMessage = getErrorMessage(error) + console.error( + `Failed to send message using ${type}, please check your ${type} notification config.\nMessage: ${errorMessage}` ) + return { type, success: false } } }) ) + + return results } export { sendNotifications } diff --git a/src/components/notification/index.ts b/src/components/notification/index.ts index 0a4564f24..d1ae26fe4 100644 --- a/src/components/notification/index.ts +++ b/src/components/notification/index.ts @@ -22,11 +22,13 @@ * SOFTWARE. * **********************************************************************************/ +import { getEventEmitter } from '../../utils/events' import { ValidatedResponse } from '../../plugins/validate-response' import getIp from '../../utils/ip' import { getMessageForAlert } from './alert-message' import { sendNotifications } from '@hyperjumptech/monika-notification' import type { Notification } from '@hyperjumptech/monika-notification' +import events from '../../events' type SendAlertsProps = { probeID: string @@ -54,5 +56,11 @@ export async function sendAlerts({ response: validation.response, }) - return sendNotifications(notifications, message) + const results = await sendNotifications(notifications, message) + for (const result of results) { + getEventEmitter().emit(events.notifications.sent, { + type: result.type, + status: result.success ? 'success' : 'failed', + }) + } } diff --git a/src/components/probe/prober/http/index.ts b/src/components/probe/prober/http/index.ts index 4eae85f1a..0155ade56 100644 --- a/src/components/probe/prober/http/index.ts +++ b/src/components/probe/prober/http/index.ts @@ -241,7 +241,7 @@ export class HTTPProber extends BaseProber { getEventEmitter().emit(events.probe.alert.triggered, { probe: this.probeConfig, requestIndex, - alertQuery: '', + alertQuery: triggeredAlert, }) addIncident({ diff --git a/src/loaders/index.ts b/src/loaders/index.ts index 63d639196..395b33974 100644 --- a/src/loaders/index.ts +++ b/src/loaders/index.ts @@ -83,6 +83,7 @@ function initPrometheus(prometheusPort: number) { incrementProbeRunningTotal, resetProbeRunningTotal, collectProbeStatus, + collectNotificationSentMetrics, } = new PrometheusCollector() // collect prometheus metrics @@ -95,6 +96,7 @@ function initPrometheus(prometheusPort: number) { eventEmitter.on(events.probe.finished, decrementProbeRunningTotal) eventEmitter.on(events.config.updated, resetProbeRunningTotal) eventEmitter.on(events.probe.status.changed, collectProbeStatus) + eventEmitter.on(events.notifications.sent, collectNotificationSentMetrics) startPrometheusMetricsServer(prometheusPort) } diff --git a/src/plugins/metrics/prometheus/collector.ts b/src/plugins/metrics/prometheus/collector.ts index 8a2130c88..e944a8c30 100644 --- a/src/plugins/metrics/prometheus/collector.ts +++ b/src/plugins/metrics/prometheus/collector.ts @@ -47,6 +47,8 @@ type PrometheusCustomCollector = { probesRunningTotal: Gauge<'id'> probesRunning: Gauge probesTotal: Gauge + notificationsTriggered: Counter<'type' | 'status'> + notificationsTriggeredTotal: Counter } type ProbeResult = { @@ -65,16 +67,16 @@ export class PrometheusCollector { // register metric collector const alertsTriggered = new Counter({ name: 'monika_alerts_triggered', - help: 'Indicates the count of alerts triggered by a probe', + help: 'Indicates the count of alerts triggered', labelNames: ['id', 'name', 'url', 'method', 'alertQuery'] as const, }) const alertsTriggeredTotal = new Counter({ name: 'monika_alerts_triggered_total', - help: 'Indicates the count of total alert triggered of all probes', + help: 'Indicates the cumulative count of alerts triggered', }) const probesRunning = new Gauge({ name: 'monika_probes_running', - help: 'Indicates whether a Monika probe is actively running checks (1 = RUNNING) or idle (0 = IDLE).', + help: 'Indicates whether a a probe is running (1) or idle (0)', labelNames: ['id'] as const, }) const probesRunningTotal = new Gauge({ @@ -83,7 +85,7 @@ export class PrometheusCollector { }) const probesStatus = new Gauge({ name: 'monika_probes_status', - help: 'Indicates the current status of the probe: 0 = DOWN (unreachable), 1 = UP (reachable).', + help: 'Indicates whether a probe is healthy (1) or is having an incident (0)', labelNames: ['id', 'name', 'url', 'method'] as const, }) const probesTotal = new Gauge({ @@ -116,9 +118,18 @@ export class PrometheusCollector { }) const statusCode = new Gauge({ name: 'monika_request_status_code_info', - help: 'Indicates the HTTP status code of the probe request', + help: "Indicates the HTTP status code of the probe requests' response(s)", labelNames: ['id', 'name', 'url', 'method'] as const, }) + const notificationsTriggered = new Counter({ + name: 'monika_notifications_triggered', + help: 'Indicates the count of notifications triggered', + labelNames: ['type', 'status'] as const, + }) + const notificationsTriggeredTotal = new Counter({ + name: 'monika_notifications_triggered_total', + help: 'Indicates the cumulative count of notifications triggered', + }) // register and collect default Node.js metrics collectDefaultMetrics({ register }) @@ -133,6 +144,8 @@ export class PrometheusCollector { responseSize, responseTime, statusCode, + notificationsTriggered, + notificationsTriggeredTotal, } } @@ -242,7 +255,7 @@ export class PrometheusCollector { name, url, method: method ?? 'GET', - alertQuery, + alertQuery: JSON.stringify(alertQuery), } const { alertsTriggered, alertsTriggeredTotal } = prometheusCustomCollector @@ -277,4 +290,23 @@ export class PrometheusCollector { prometheusCustomCollector.probesRunning.reset() prometheusCustomCollector.probesRunningTotal.reset() } + + collectNotificationSentMetrics({ + type, + status, + }: { + type: string + status: 'success' | 'failed' + }) { + if (!prometheusCustomCollector) { + throw new Error('Prometheus collector is not registered') + } + + const { notificationsTriggered, notificationsTriggeredTotal } = + prometheusCustomCollector + + // collect metrics + notificationsTriggered.labels({ type, status }).inc() + notificationsTriggeredTotal.inc() + } } diff --git a/src/plugins/metrics/prometheus/publisher.ts b/src/plugins/metrics/prometheus/publisher.ts index 59ab5bcd5..01ae5fe5b 100644 --- a/src/plugins/metrics/prometheus/publisher.ts +++ b/src/plugins/metrics/prometheus/publisher.ts @@ -45,7 +45,10 @@ export function startPrometheusMetricsServer(port: number): void { try { const prometheusMetrics = await register.metrics() - res.status(200).end(prometheusMetrics) + res + .status(200) + .header('Content-Type', register.contentType) + .end(prometheusMetrics) } catch (error: unknown) { res.status(500).json({ message: getErrorMessage(error) }) } From c01aaf7df1f7a7ed29ad5a0d40f4aac6055defc7 Mon Sep 17 00:00:00 2001 From: Denny Pradipta Date: Fri, 24 Jan 2025 10:39:10 +0700 Subject: [PATCH 6/8] Add docs for default metrics --- docs/src/pages/guides/cli-options.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/src/pages/guides/cli-options.md b/docs/src/pages/guides/cli-options.md index 6126a805e..1cd8f86d2 100644 --- a/docs/src/pages/guides/cli-options.md +++ b/docs/src/pages/guides/cli-options.md @@ -259,6 +259,8 @@ Monika exposes [Prometheus default metrics](https://prometheus.io/docs/instrumen | `monika_notifications_triggered` | Counter | Indicates the count of notifications triggered | `type`, `status` | | `monika_notifications_triggered_total` | Counter | Indicates the cumulative count of notifications triggered | - | +Aside from the above metrics, Monika also exposes [Prometheus default metrics](https://prometheus.io/docs/instrumenting/writing_clientlibs/#standard-and-runtime-collectors) and [Node.js specific metrics](https://github.com/siimon/prom-client/tree/master/lib/metrics) + ## Repeat By default monika will continuously loop through all your probes in the configuration. To loop for a specific number of repeats use the `-r` or `--repeat` flags followed by a number. For example to repeat only 3 times type the command below: From f91a9f413832dd2a5d87c55daca12248ba9200dc Mon Sep 17 00:00:00 2001 From: Denny Pradipta Date: Fri, 24 Jan 2025 10:41:50 +0700 Subject: [PATCH 7/8] Clarify docs --- docs/src/pages/guides/cli-options.md | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/docs/src/pages/guides/cli-options.md b/docs/src/pages/guides/cli-options.md index 1cd8f86d2..5d707f4ad 100644 --- a/docs/src/pages/guides/cli-options.md +++ b/docs/src/pages/guides/cli-options.md @@ -245,19 +245,19 @@ Then you can scrape the metrics from `http://localhost:3001/metrics`. Monika exposes [Prometheus default metrics](https://prometheus.io/docs/instrumenting/writing_clientlibs/#standard-and-runtime-collectors), [Node.js specific metrics](https://github.com/siimon/prom-client/tree/master/lib/metrics), and Monika probe metrics below. -| Metric Name | Type | Purpose | Labels | -| -------------------------------------- | --------- | --------------------------------------------------------------------- | ----------------------------------------------------- | -| `monika_alerts_triggered` | Counter | Indicates the count of alerts triggered | `id`, `name`, `url`, `method`, `alertQuery` | -| `monika_alerts_triggered_total` | Counter | Indicates the cumulative count of alerts triggered | - | -| `monika_probes_running` | Gauge | Indicates whether a probe is running (1) or idle (0) | `id` | -| `monika_probes_running_total` | Gauge | Indicates the total count of probes that are currently running checks | - | -| `monika_probes_status` | Gauge | Indicates whether a probe is healthy (1) or is having an incident (0) | `id`, `name`, `url`, `method` | -| `monika_probes_total` | Gauge | Total count of all probes configured | - | -| `monika_request_response_size_bytes` | Gauge | Indicates the size of probe request's response in bytes | `id`, `name`, `url`, `method`, `statusCode`, `result` | -| `monika_request_response_time_seconds` | Histogram | Indicates the duration of the probe request in seconds | `id`, `name`, `url`, `method`, `statusCode`, `result` | -| `monika_request_status_code_info` | Gauge | Indicates the HTTP status code of the probe requests' response(s) | `id`, `name`, `url`, `method` | -| `monika_notifications_triggered` | Counter | Indicates the count of notifications triggered | `type`, `status` | -| `monika_notifications_triggered_total` | Counter | Indicates the cumulative count of notifications triggered | - | +| Metric Name | Type | Purpose | Labels | +| -------------------------------------- | --------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ----------------------------------------------------- | +| `monika_alerts_triggered` | Counter | Indicates the count of incident alerts triggered | `id`, `name`, `url`, `method`, `alertQuery` | +| `monika_alerts_triggered_total` | Counter | Indicates the cumulative count of incident alerts triggered | - | +| `monika_probes_running` | Gauge | Indicates whether a probe is running (1) or idle (0). Running means the probe is currently sending requests, while idle means the probe is waiting for the next request to be sent. | +| `monika_probes_running_total` | Gauge | Indicates the total count of probes that are currently running. Running means the probe is currently sending requests. | - | +| `monika_probes_status` | Gauge | Indicates whether a probe is healthy (1) or is having an incident (0) | `id`, `name`, `url`, `method` | +| `monika_probes_total` | Gauge | Total count of all probes configured | - | +| `monika_request_response_size_bytes` | Gauge | Indicates the size of probe request's response in bytes | `id`, `name`, `url`, `method`, `statusCode`, `result` | +| `monika_request_response_time_seconds` | Histogram | Indicates the duration of the probe request in seconds | `id`, `name`, `url`, `method`, `statusCode`, `result` | +| `monika_request_status_code_info` | Gauge | Indicates the HTTP status code of the probe requests' response(s) | `id`, `name`, `url`, `method` | +| `monika_notifications_triggered` | Counter | Indicates the count of notifications triggered | `type`, `status` | +| `monika_notifications_triggered_total` | Counter | Indicates the cumulative count of notifications triggered | - | Aside from the above metrics, Monika also exposes [Prometheus default metrics](https://prometheus.io/docs/instrumenting/writing_clientlibs/#standard-and-runtime-collectors) and [Node.js specific metrics](https://github.com/siimon/prom-client/tree/master/lib/metrics) From a8ca3347b1594591762cfb842fdd7e5f6f6e1723 Mon Sep 17 00:00:00 2001 From: Denny Pradipta Date: Fri, 24 Jan 2025 15:28:50 +0700 Subject: [PATCH 8/8] Reverse the FAILED_ASSERTION_ALERT hacks --- src/components/config/get.ts | 27 +-------------------------- 1 file changed, 1 insertion(+), 26 deletions(-) diff --git a/src/components/config/get.ts b/src/components/config/get.ts index cfeeb050a..b63a2ca57 100644 --- a/src/components/config/get.ts +++ b/src/components/config/get.ts @@ -22,7 +22,6 @@ * SOFTWARE. * **********************************************************************************/ -import { randomUUID } from 'node:crypto' import { getContext } from '../../context' import type { Config } from '../../interfaces/config' import { log } from '../../utils/pino' @@ -42,10 +41,7 @@ export async function getRawConfig(): Promise { return addDefaultNotifications(config) } - // Add default alerts for Probe not Accessible - const finalizedConfig = addDefaultAlerts(config) - - return finalizedConfig + return config } // mergeConfigs merges configs by overwriting each other @@ -86,27 +82,6 @@ async function parseNativeConfig(): Promise { ) } -export const FAILED_REQUEST_ASSERTION = { - assertion: '', - message: 'Probe not accessible', -} - -function addDefaultAlerts(config: Config) { - return { - ...config, - probes: config.probes?.map((probe) => ({ - ...probe, - alerts: [ - ...(probe.alerts || []), - { - id: randomUUID(), - ...FAILED_REQUEST_ASSERTION, - }, - ], - })), - } -} - async function parseNonNativeConfig(): Promise { const { flags } = getContext() const hasNonNativeConfig =