Skip to content

Commit

Permalink
CLDSRV-520: display utilization service uptime as timeserie
Browse files Browse the repository at this point in the history
  • Loading branch information
williamlardier committed Apr 17, 2024
1 parent 1ddad9d commit 54d36b3
Show file tree
Hide file tree
Showing 2 changed files with 75 additions and 60 deletions.
101 changes: 58 additions & 43 deletions monitoring/dashboard.json
Original file line number Diff line number Diff line change
Expand Up @@ -2701,60 +2701,68 @@
"error": false,
"fieldConfig": {
"defaults": {
"calcs": [
"mean"
],
"decimals": null,
"limit": null,
"links": [],
"color": {
"mode": "palette-classic"
},
"custom": {
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 30,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "stepAfter",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"log": 2,
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"max": 100,
"min": 0,
"noValue": "-",
"override": {},
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "#808080",
"color": "green",
"index": 0,
"line": true,
"op": "gt",
"value": "null",
"yaxis": "left"
},
{
"color": "red",
"index": 1,
"line": true,
"op": "gt",
"value": 0.0,
"yaxis": "left"
},
{
"color": "orange",
"index": 2,
"index": 1,
"line": true,
"op": "gt",
"value": 90.0,
"yaxis": "left"
},
{
"color": "green",
"index": 3,
"color": "red",
"index": 2,
"line": true,
"op": "gt",
"value": 95.0,
"value": 0.0,
"yaxis": "left"
}
]
},
"title": null,
"unit": "percent",
"values": false
"unit": "bool_on_off"
},
"showThresholdLabels": false,
"showThresholdMarkers": true
"overrides": []
},
"gridPos": {
"h": 8,
Expand All @@ -2767,10 +2775,13 @@
"links": [],
"maxDataPoints": 100,
"options": {
"reduceOptions": {
"calcs": [
"mean"
]
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom"
},
"tooltip": {
"mode": "single"
}
},
"targets": [
Expand All @@ -2782,7 +2793,7 @@
"instant": false,
"interval": "",
"intervalFactor": 1,
"legendFormat": "Online rate",
"legendFormat": "State",
"metric": "",
"refId": "",
"step": 10,
Expand All @@ -2792,7 +2803,7 @@
"title": "Quota service uptime",
"transformations": [],
"transparent": false,
"type": "gauge"
"type": "timeseries"
},
{
"datasource": "${DS_PROMETHEUS}",
Expand Down Expand Up @@ -3274,9 +3285,13 @@
"maxDataPoints": 100,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom"
"calcs": [
"min",
"mean",
"max"
],
"displayMode": "table",
"placement": "right"
},
"tooltip": {
"mode": "single"
Expand Down Expand Up @@ -3305,7 +3320,7 @@
"instant": false,
"interval": "",
"intervalFactor": 1,
"legendFormat": "Data write (bucket, success)",
"legendFormat": "Data write (bucket)",
"metric": "",
"refId": "",
"step": 10,
Expand All @@ -3319,7 +3334,7 @@
"instant": false,
"interval": "",
"intervalFactor": 1,
"legendFormat": "Data write (bucket, quota exceeded)",
"legendFormat": "Data write (bucket, exceeded)",
"metric": "",
"refId": "",
"step": 10,
Expand All @@ -3333,7 +3348,7 @@
"instant": false,
"interval": "",
"intervalFactor": 1,
"legendFormat": "Data write (account, success)",
"legendFormat": "Data write (account)",
"metric": "",
"refId": "",
"step": 10,
Expand All @@ -3347,7 +3362,7 @@
"instant": false,
"interval": "",
"intervalFactor": 1,
"legendFormat": "Data write (account, quota exceeded)",
"legendFormat": "Data write (account, exceeded)",
"metric": "",
"refId": "",
"step": 10,
Expand All @@ -3361,7 +3376,7 @@
"instant": false,
"interval": "",
"intervalFactor": 1,
"legendFormat": "Data write (account and bucket, success)",
"legendFormat": "Data write (account & bucket)",
"metric": "",
"refId": "",
"step": 10,
Expand All @@ -3375,7 +3390,7 @@
"instant": false,
"interval": "",
"intervalFactor": 1,
"legendFormat": "Data write (account and bucket, quota exceeded)",
"legendFormat": "Data write (account & bucket, exceeded)",
"metric": "",
"refId": "",
"step": 10,
Expand Down
34 changes: 17 additions & 17 deletions monitoring/dashboard.py
Original file line number Diff line number Diff line change
Expand Up @@ -558,27 +558,24 @@ def top10_errors_by_bucket(title, code):
)


quotaHealth = GaugePanel(
quotaHealth = TimeSeries(
title="Quota service uptime",
dataSource="${DS_PROMETHEUS}",
calc="mean",
format=UNITS.PERCENT_FORMAT,
min=0,
max=100,
noValue="-",
lineInterpolation="stepAfter",
fillOpacity=30,
unit="bool_on_off",
targets=[Target(
expr="\n".join([
'sum(rate(s3_cloudserver_utilization_service_state{namespace="${namespace}", state="healthy"}[$__rate_interval])) * 100', # noqa: E501
" /",
'sum(rate(s3_cloudserver_utilization_service_state{namespace="${namespace}", state=~"unhealthy|healthy"}[$__rate_interval]))', # noqa: E501
]),
legendFormat="Online rate",
legendFormat="State",
)],
thresholds=[
Threshold("#808080", 0, 0.0),
Threshold("red", 1, 0.0),
Threshold("orange", 2, 90.0),
Threshold("green", 3, 95.0),
Threshold("green", 0, 95.0),
Threshold("orange", 1, 90.0),
Threshold("red", 2, 0.0),
],
)

Expand Down Expand Up @@ -626,22 +623,25 @@ def top10_errors_by_bucket(title, code):
dataSource="${DS_PROMETHEUS}",
lineInterpolation="smooth",
spanNulls=3*60*1000,
legendDisplayMode="table",
legendPlacement="right",
legendValues=["min", "mean", "max"],
unit=UNITS.SECONDS,
targets=[
average_quota_latency_target(title="Overall"),
average_quota_latency_target(
title="Data write (bucket, success)", type='"bucket"', code='~"2.."'),
title="Data write (bucket)", type='"bucket"', code='~"2.."'),
average_quota_latency_target(
title="Data write (bucket, quota exceeded)", type='"bucket"', code='"429"'),
title="Data write (bucket, exceeded)", type='"bucket"', code='"429"'),
average_quota_latency_target(
title="Data write (account, success)", type='"account"', code='~"2.."'),
title="Data write (account)", type='"account"', code='~"2.."'),
average_quota_latency_target(
title="Data write (account, quota exceeded)", type='"account"', code='"429"'),
title="Data write (account, exceeded)", type='"account"', code='"429"'),
average_quota_latency_target(
title="Data write (account and bucket, success)", type='"bucketAccount"',
title="Data write (account & bucket)", type='"bucketAccount"',
code='~"2.."'),
average_quota_latency_target(
title="Data write (account and bucket, quota exceeded)", type='"bucketAccount"',
title="Data write (account & bucket, exceeded)", type='"bucketAccount"',
code='"429"'),
average_quota_latency_target(
title="Data deletion", type='"delete"'),
Expand Down

0 comments on commit 54d36b3

Please sign in to comment.