diff --git a/evals/benchmark/grafana/README.md b/evals/benchmark/grafana/README.md index 1484b7fe..8e7fe83a 100644 --- a/evals/benchmark/grafana/README.md +++ b/evals/benchmark/grafana/README.md @@ -64,6 +64,12 @@ If you have any Grafana installation issue please check this [link](https://gra The next step is to configure the data source for Grafana to scrape metrics from. Click on the "Data Source" button, select Prometheus, and specify the Prometheus url `localhost:9090`. -Then you need to upload a dashboard JSON file in the Grafana UI under `Home > Dashboards > Import dashboard`. You can use a file like [tgi_grafana.json](https://github.com/huggingface/text-generation-inference/blob/main/assets/tgi_grafana.json). - +## 3. Import Grafana Dashboard +After setup the Grafana server, then you can import a Grafana Dashboard through uploading a dashboard JSON file in the Grafana UI under `Home > Dashboards > Import dashboard`. You can use a file like [tgi_grafana.json](https://github.com/huggingface/text-generation-inference/blob/main/assets/tgi_grafana.json). Open the dashboard, and you will see different panels displaying the metrics data. + +In this folder, we also provides some Grafana dashboard JSON files for your reference. +- `chatqna_megaservice_grafana.json`: A sample Grafana dashboard JSON file for visualizing the metrics of ChatQnA microservices. Selecting different job_name options in the top-left of the dashboard displays the metrics for the corresponding microservices. +- `tei_grafana.json`: A sample Grafana dashboard JSON file for visualizing TEI metrics. +- `tgi_grafana.json`: A sample Grafana dashboard JSON file for visualizing TGI metrics. +- `redis_grafana.json`: A sample Grafana dashboard JSON file for visualizing the Redis metrics. For importing the redis metrics, you need to add the new connection and Redis data source in Grafana. Please refer this [link](https://grafana.com/grafana/plugins/redis-datasource/?tab=installation) for more details. diff --git a/evals/benchmark/grafana/chatqna_megaservice_grafana.json b/evals/benchmark/grafana/chatqna_megaservice_grafana.json index bed8be34..3705d029 100644 --- a/evals/benchmark/grafana/chatqna_megaservice_grafana.json +++ b/evals/benchmark/grafana/chatqna_megaservice_grafana.json @@ -1,4 +1,47 @@ { + "__inputs": [ + { + "name": "DS_PROMETHEUS", + "label": "Prometheus Data Source", + "description": "", + "type": "datasource", + "pluginId": "prometheus", + "pluginName": "Prometheus" + } + ], + "__elements": {}, + "__requires": [ + { + "type": "panel", + "id": "gauge", + "name": "Gauge", + "version": "" + }, + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "10.0.2" + }, + { + "type": "panel", + "id": "heatmap", + "name": "Heatmap", + "version": "" + }, + { + "type": "datasource", + "id": "prometheus", + "name": "Prometheus", + "version": "1.0.0" + }, + { + "type": "panel", + "id": "timeseries", + "name": "Time series", + "version": "" + } + ], "annotations": { "list": [ { @@ -18,14 +61,14 @@ "editable": true, "fiscalYearStartMonth": 0, "graphTooltip": 0, - "id": 30, + "id": 1, "links": [], "liveNow": false, "panels": [ { "datasource": { "type": "prometheus", - "uid": "P1809F7CD0C75ACF3" + "uid": "${DS_PROMETHEUS}" }, "fieldConfig": { "defaults": { @@ -111,17 +154,17 @@ "$$hashKey": "object:638", "datasource": { "type": "prometheus", - "uid": "e4584a9f-5364-4b3d-a851-7abbc5250820" + "uid": "${DS_PROMETHEUS}" }, "disableTextWrap": false, "editorMode": "builder", - "expr": "rate(process_cpu_seconds_total{container=\"$container_name\"}[1m])", + "expr": "rate(process_cpu_seconds_total{job=\"$job_name\"}[1m])", "format": "time_series", "fullMetaSearch": false, "includeNullMetadata": true, "interval": "", "intervalFactor": 1, - "legendFormat": "cpu", + "legendFormat": "{{ job }}", "range": true, "refId": "A", "useBackend": false @@ -133,7 +176,7 @@ { "datasource": { "type": "prometheus", - "uid": "P1809F7CD0C75ACF3" + "uid": "${DS_PROMETHEUS}" }, "fieldConfig": { "defaults": { @@ -218,17 +261,17 @@ "$$hashKey": "object:638", "datasource": { "type": "prometheus", - "uid": "e4584a9f-5364-4b3d-a851-7abbc5250820" + "uid": "${DS_PROMETHEUS}" }, "disableTextWrap": false, "editorMode": "builder", - "expr": "process_resident_memory_bytes{container=\"$container_name\"}", + "expr": "process_resident_memory_bytes{job=\"$job_name\"}", "format": "time_series", "fullMetaSearch": false, "includeNullMetadata": true, "interval": "", "intervalFactor": 1, - "legendFormat": "mem", + "legendFormat": "{{ job }}", "range": true, "refId": "A", "useBackend": false @@ -240,7 +283,7 @@ { "datasource": { "type": "prometheus", - "uid": "P1809F7CD0C75ACF3" + "uid": "${DS_PROMETHEUS}" }, "fieldConfig": { "defaults": { @@ -327,11 +370,11 @@ "$$hashKey": "object:214", "datasource": { "type": "prometheus", - "uid": "e4584a9f-5364-4b3d-a851-7abbc5250820" + "uid": "${DS_PROMETHEUS}" }, "disableTextWrap": false, "editorMode": "code", - "expr": "sum by(handler) (rate(http_requests_total{container=\"$container_name\", handler!~\"/metrics|/v1/health_check|none\"}[1m]))", + "expr": "sum by(handler) (rate(http_requests_total{job=\"$job_name\", handler!~\"/metrics|/v1/health_check|none\"}[1m]))", "format": "time_series", "fullMetaSearch": false, "includeNullMetadata": true, @@ -349,7 +392,7 @@ { "datasource": { "type": "prometheus", - "uid": "P1809F7CD0C75ACF3" + "uid": "${DS_PROMETHEUS}" }, "fieldConfig": { "defaults": { @@ -467,11 +510,11 @@ "$$hashKey": "object:140", "datasource": { "type": "prometheus", - "uid": "e4584a9f-5364-4b3d-a851-7abbc5250820" + "uid": "${DS_PROMETHEUS}" }, "disableTextWrap": false, "editorMode": "builder", - "expr": "sum by(status) (rate(http_requests_total{container=\"$container_name\"}[1m]))", + "expr": "sum by(status) (rate(http_requests_total{job=\"$job_name\"}[1m]))", "format": "time_series", "fullMetaSearch": false, "includeNullMetadata": true, @@ -489,7 +532,7 @@ { "datasource": { "type": "prometheus", - "uid": "P1809F7CD0C75ACF3" + "uid": "${DS_PROMETHEUS}" }, "fieldConfig": { "defaults": { @@ -592,11 +635,11 @@ "$$hashKey": "object:766", "datasource": { "type": "prometheus", - "uid": "e4584a9f-5364-4b3d-a851-7abbc5250820" + "uid": "${DS_PROMETHEUS}" }, "disableTextWrap": false, "editorMode": "builder", - "expr": "sum(rate(http_requests_total{status=\"5xx\", container=\"$container_name\"}[30s]))", + "expr": "sum(rate(http_requests_total{status=\"5xx\", job=\"$job_name\"}[30s]))", "format": "time_series", "fullMetaSearch": false, "includeNullMetadata": true, @@ -614,7 +657,7 @@ { "datasource": { "type": "prometheus", - "uid": "P1809F7CD0C75ACF3" + "uid": "${DS_PROMETHEUS}" }, "description": "", "fieldConfig": { @@ -704,11 +747,11 @@ "$$hashKey": "object:426", "datasource": { "type": "prometheus", - "uid": "e4584a9f-5364-4b3d-a851-7abbc5250820" + "uid": "${DS_PROMETHEUS}" }, "disableTextWrap": false, "editorMode": "code", - "expr": "histogram_quantile(0.$Percentage, rate(http_request_duration_seconds_bucket{container=\"$container_name\", handler!~\"/metrics|/v1/health_check|none\"}[1m])) ", + "expr": "histogram_quantile(0.$Percentage, rate(http_request_duration_seconds_bucket{job=\"$job_name\", handler!~\"/metrics|/v1/health_check|none\"}[1m])) ", "format": "time_series", "fullMetaSearch": false, "includeNullMetadata": true, @@ -726,7 +769,7 @@ { "datasource": { "type": "prometheus", - "uid": "P1809F7CD0C75ACF3" + "uid": "${DS_PROMETHEUS}" }, "fieldConfig": { "defaults": { @@ -814,10 +857,10 @@ "$$hashKey": "object:146", "datasource": { "type": "prometheus", - "uid": "e4584a9f-5364-4b3d-a851-7abbc5250820" + "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "http_request_duration_seconds_sum{handler!=\"none\",container=\"$container_name\", handler!~\"/metrics|/v1/health_check|none\"} / http_request_duration_seconds_count", + "expr": "http_request_duration_seconds_sum{handler!=\"none\",job=\"$job_name\", handler!~\"/metrics|/v1/health_check|none\"} / http_request_duration_seconds_count", "format": "time_series", "interval": "", "intervalFactor": 1, @@ -843,17 +886,17 @@ }, "datasource": { "type": "prometheus", - "uid": "P1809F7CD0C75ACF3" + "uid": "${DS_PROMETHEUS}" }, - "definition": "label_values(http_requests_total{namespace=\"opea-rag\"},container)", + "definition": "label_values(http_requests_total{},job)", "hide": 0, "includeAll": false, "multi": false, - "name": "container_name", + "name": "job_name", "options": [], "query": { "qryType": 1, - "query": "label_values(http_requests_total{namespace=\"opea-rag\"},container)", + "query": "label_values(http_requests_total{},job)", "refId": "PrometheusVariableQueryEditor-VariableQuery" }, "refresh": 1, @@ -925,6 +968,6 @@ "timezone": "", "title": "ChatQnA MegaService Dashboard", "uid": "_eX4mpl0", - "version": 12, + "version": 1, "weekStart": "" } diff --git a/evals/benchmark/grafana/tei_grafana.json b/evals/benchmark/grafana/tei_grafana.json index bb9763e6..8dcd7a5d 100644 --- a/evals/benchmark/grafana/tei_grafana.json +++ b/evals/benchmark/grafana/tei_grafana.json @@ -1,4 +1,47 @@ { + "__inputs": [ + { + "name": "DS_PROMETHEUS", + "label": "Prometheus Data Source", + "description": "", + "type": "datasource", + "pluginId": "prometheus", + "pluginName": "Prometheus" + } + ], + "__elements": {}, + "__requires": [ + { + "type": "panel", + "id": "gauge", + "name": "Gauge", + "version": "" + }, + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "10.0.2" + }, + { + "type": "panel", + "id": "heatmap", + "name": "Heatmap", + "version": "" + }, + { + "type": "datasource", + "id": "prometheus", + "name": "Prometheus", + "version": "1.0.0" + }, + { + "type": "panel", + "id": "timeseries", + "name": "Time series", + "version": "" + } + ], "annotations": { "list": [ { @@ -26,7 +69,7 @@ "fiscalYearStartMonth": 0, "gnetId": 20246, "graphTooltip": 0, - "id": 11, + "id": 1, "links": [], "liveNow": false, "panels": [ @@ -46,7 +89,7 @@ { "datasource": { "type": "prometheus", - "uid": "ads1wmjwuc7pce" + "uid": "${DS_PROMETHEUS}" }, "fieldConfig": { "defaults": { @@ -121,6 +164,7 @@ "showLegend": true }, "tooltip": { + "maxHeight": 600, "mode": "single", "sort": "none" } @@ -129,10 +173,10 @@ { "datasource": { "type": "prometheus", - "uid": "ads1wmjwuc7pce" + "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "sum(increase(te_embed_success{namespace=\"$namespace\", service=\"$service\"}[1m]))", + "expr": "sum(increase(te_embed_success{namespace=\"\", job=\"$job\"}[1m]))", "legendFormat": "Success", "range": true, "refId": "A" @@ -140,10 +184,10 @@ { "datasource": { "type": "prometheus", - "uid": "ads1wmjwuc7pce" + "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "sum(increase(te_embed_count{namespace=\"$namespace\", service=\"$service\"}[1m]))", + "expr": "sum(increase(te_embed_count{namespace=\"\", job=\"$job\"}[1m]))", "hide": false, "legendFormat": "Total Count", "range": true, @@ -156,7 +200,7 @@ { "datasource": { "type": "prometheus", - "uid": "ads1wmjwuc7pce" + "uid": "${DS_PROMETHEUS}" }, "fieldConfig": { "defaults": { @@ -277,6 +321,7 @@ "showLegend": true }, "tooltip": { + "maxHeight": 600, "mode": "single", "sort": "none" } @@ -285,10 +330,10 @@ { "datasource": { "type": "prometheus", - "uid": "ads1wmjwuc7pce" + "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "histogram_quantile(0.5, sum by (le) (rate(te_embed_queue_duration_bucket{namespace=\"$namespace\", service=\"$service\"}[10m])))", + "expr": "histogram_quantile(0.5, sum by (le) (rate(te_embed_queue_duration_bucket{namespace=\"\", job=\"$job\"}[10m])))", "legendFormat": "p50", "range": true, "refId": "A" @@ -296,10 +341,10 @@ { "datasource": { "type": "prometheus", - "uid": "ads1wmjwuc7pce" + "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "histogram_quantile(0.9, sum by (le) (rate(te_embed_queue_duration_bucket{container=\"$service\"}[10m])))", + "expr": "histogram_quantile(0.9, sum by (le) (rate(te_embed_queue_duration_bucket{job=\"$job\"}[10m])))", "hide": false, "legendFormat": "p90", "range": true, @@ -308,10 +353,10 @@ { "datasource": { "type": "prometheus", - "uid": "ads1wmjwuc7pce" + "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "histogram_quantile(0.99, sum by (le) (rate(te_embed_queue_duration_bucket{container=\"$service\"}[10m])))", + "expr": "histogram_quantile(0.99, sum by (le) (rate(te_embed_queue_duration_bucket{job=\"$job\"}[10m])))", "hide": false, "legendFormat": "p99", "range": true, @@ -334,7 +379,7 @@ "dataFormat": "tsbuckets", "datasource": { "type": "prometheus", - "uid": "ads1wmjwuc7pce" + "uid": "${DS_PROMETHEUS}" }, "fieldConfig": { "defaults": { @@ -394,6 +439,7 @@ }, "showValue": "never", "tooltip": { + "maxHeight": 600, "mode": "single", "showColorScale": false, "yHistogram": false @@ -405,17 +451,17 @@ "unit": "s" } }, - "pluginVersion": "11.1.0", + "pluginVersion": "11.0.0", "reverseYBuckets": false, "targets": [ { "datasource": { "type": "prometheus", - "uid": "ads1wmjwuc7pce" + "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", "exemplar": true, - "expr": "sum(increase(te_request_duration_bucket{namespace=\"$namespace\", service=\"$service\"}[5m])) by (le)", + "expr": "sum(increase(te_request_duration_bucket{namespace=\"\", job=\"$job\"}[5m])) by (le)", "format": "heatmap", "interval": "", "legendFormat": "{{ le }}", @@ -443,7 +489,7 @@ { "datasource": { "type": "prometheus", - "uid": "ads1wmjwuc7pce" + "uid": "${DS_PROMETHEUS}" }, "fieldConfig": { "defaults": { @@ -518,6 +564,7 @@ "showLegend": false }, "tooltip": { + "maxHeight": 600, "mode": "single", "sort": "none" } @@ -527,10 +574,10 @@ { "datasource": { "type": "prometheus", - "uid": "ads1wmjwuc7pce" + "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "count(te_request_count{namespace=\"$namespace\", service=\"$service\"})", + "expr": "count(te_request_count{namespace=\"\", job=\"$job\"})", "legendFormat": "Replicas", "range": true, "refId": "A" @@ -542,7 +589,7 @@ { "datasource": { "type": "prometheus", - "uid": "ads1wmjwuc7pce" + "uid": "${DS_PROMETHEUS}" }, "fieldConfig": { "defaults": { @@ -589,15 +636,15 @@ "showThresholdMarkers": true, "sizing": "auto" }, - "pluginVersion": "11.1.0", + "pluginVersion": "11.0.0", "targets": [ { "datasource": { "type": "prometheus", - "uid": "ads1wmjwuc7pce" + "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "sum(te_queue_size{namespace=\"$namespace\", service=\"$service\"})", + "expr": "sum(te_queue_size{namespace=\"\", job=\"$job\"})", "legendFormat": "__auto", "range": true, "refId": "A" @@ -609,7 +656,7 @@ { "datasource": { "type": "prometheus", - "uid": "ads1wmjwuc7pce" + "uid": "${DS_PROMETHEUS}" }, "fieldConfig": { "defaults": { @@ -730,6 +777,7 @@ "showLegend": true }, "tooltip": { + "maxHeight": 600, "mode": "single", "sort": "none" } @@ -738,10 +786,10 @@ { "datasource": { "type": "prometheus", - "uid": "ads1wmjwuc7pce" + "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "histogram_quantile(0.5, sum by (le) (rate(te_embed_duration_bucket{namespace=\"$namespace\", service=\"$service\"}[10m])))", + "expr": "histogram_quantile(0.5, sum by (le) (rate(te_embed_duration_bucket{namespace=\"\", job=\"$job\"}[10m])))", "legendFormat": "p50", "range": true, "refId": "A" @@ -749,10 +797,10 @@ { "datasource": { "type": "prometheus", - "uid": "ads1wmjwuc7pce" + "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "histogram_quantile(0.9, sum by (le) (rate(te_embed_duration_bucket{container=\"$service\"}[10m])))", + "expr": "histogram_quantile(0.9, sum by (le) (rate(te_embed_duration_bucket{job=\"$job\"}[10m])))", "hide": false, "legendFormat": "p90", "range": true, @@ -761,10 +809,10 @@ { "datasource": { "type": "prometheus", - "uid": "ads1wmjwuc7pce" + "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "histogram_quantile(0.99, sum by (le) (rate(te_embed_duration_bucket{container=\"$service\"}[10m])))", + "expr": "histogram_quantile(0.99, sum by (le) (rate(te_embed_duration_bucket{job=\"$job\"}[10m])))", "hide": false, "legendFormat": "p99", "range": true, @@ -777,7 +825,7 @@ { "datasource": { "type": "prometheus", - "uid": "ads1wmjwuc7pce" + "uid": "${DS_PROMETHEUS}" }, "fieldConfig": { "defaults": { @@ -898,6 +946,7 @@ "showLegend": true }, "tooltip": { + "maxHeight": 600, "mode": "single", "sort": "none" } @@ -906,10 +955,10 @@ { "datasource": { "type": "prometheus", - "uid": "ads1wmjwuc7pce" + "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "histogram_quantile(0.5, sum by (le) (rate(te_request_tokenization_duration_bucket{namespace=\"$namespace\", service=\"$service\"}[10m])))", + "expr": "histogram_quantile(0.5, sum by (le) (rate(te_request_tokenization_duration_bucket{namespace=\"\", job=\"$job\"}[10m])))", "legendFormat": "p50", "range": true, "refId": "A" @@ -917,10 +966,10 @@ { "datasource": { "type": "prometheus", - "uid": "ads1wmjwuc7pce" + "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "histogram_quantile(0.9, sum by (le) (rate(te_request_tokenization_duration_bucket{container=\"$service\"}[10m])))", + "expr": "histogram_quantile(0.9, sum by (le) (rate(te_request_tokenization_duration_bucket{job=\"$job\"}[10m])))", "hide": false, "legendFormat": "p90", "range": true, @@ -929,10 +978,10 @@ { "datasource": { "type": "prometheus", - "uid": "ads1wmjwuc7pce" + "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "histogram_quantile(0.99, sum by (le) (rate(te_request_tokenization_duration_bucket{container=\"$service\"}[10m])))", + "expr": "histogram_quantile(0.99, sum by (le) (rate(te_request_tokenization_duration_bucket{job=\"$job\"}[10m])))", "hide": false, "legendFormat": "p99", "range": true, @@ -945,7 +994,7 @@ { "datasource": { "type": "prometheus", - "uid": "ads1wmjwuc7pce" + "uid": "${DS_PROMETHEUS}" }, "fieldConfig": { "defaults": { @@ -1066,6 +1115,7 @@ "showLegend": true }, "tooltip": { + "maxHeight": 600, "mode": "single", "sort": "none" } @@ -1074,10 +1124,10 @@ { "datasource": { "type": "prometheus", - "uid": "ads1wmjwuc7pce" + "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "histogram_quantile(0.5, sum by (le) (rate(te_request_inference_duration_bucket{namespace=\"$namespace\", service=\"$service\"}[10m])))", + "expr": "histogram_quantile(0.5, sum by (le) (rate(te_request_inference_duration_bucket{namespace=\"\", job=\"$job\"}[10m])))", "legendFormat": "p50", "range": true, "refId": "A" @@ -1085,10 +1135,10 @@ { "datasource": { "type": "prometheus", - "uid": "ads1wmjwuc7pce" + "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "histogram_quantile(0.9, sum by (le) (rate(te_request_inference_duration_bucket{container=\"$service\"}[10m])))", + "expr": "histogram_quantile(0.9, sum by (le) (rate(te_request_inference_duration_bucket{job=\"$job\"}[10m])))", "hide": false, "legendFormat": "p90", "range": true, @@ -1097,10 +1147,10 @@ { "datasource": { "type": "prometheus", - "uid": "ads1wmjwuc7pce" + "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "histogram_quantile(0.99, sum by (le) (rate(te_request_inference_duration_bucket{container=\"$service\"}[10m])))", + "expr": "histogram_quantile(0.99, sum by (le) (rate(te_request_inference_duration_bucket{job=\"$job\"}[10m])))", "hide": false, "legendFormat": "p99", "range": true, @@ -1113,7 +1163,7 @@ { "datasource": { "type": "prometheus", - "uid": "ads1wmjwuc7pce" + "uid": "${DS_PROMETHEUS}" }, "fieldConfig": { "defaults": { @@ -1234,6 +1284,7 @@ "showLegend": true }, "tooltip": { + "maxHeight": 600, "mode": "single", "sort": "none" } @@ -1242,10 +1293,10 @@ { "datasource": { "type": "prometheus", - "uid": "ads1wmjwuc7pce" + "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "histogram_quantile(0.5, sum by (le) (rate(te_embed_tokenization_duration_bucket{namespace=\"$namespace\", service=\"$service\"}[10m])))", + "expr": "histogram_quantile(0.5, sum by (le) (rate(te_embed_tokenization_duration_bucket{namespace=\"\", job=\"$job\"}[10m])))", "legendFormat": "p50", "range": true, "refId": "A" @@ -1253,10 +1304,10 @@ { "datasource": { "type": "prometheus", - "uid": "ads1wmjwuc7pce" + "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "histogram_quantile(0.9, sum by (le) (rate(te_embed_tokenization_duration_bucket{container=\"$service\"}[10m])))", + "expr": "histogram_quantile(0.9, sum by (le) (rate(te_embed_tokenization_duration_bucket{job=\"$job\"}[10m])))", "hide": false, "legendFormat": "p90", "range": true, @@ -1265,10 +1316,10 @@ { "datasource": { "type": "prometheus", - "uid": "ads1wmjwuc7pce" + "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "histogram_quantile(0.99, sum by (le) (rate(te_embed_tokenization_duration_bucket{container=\"$service\"}[10m])))", + "expr": "histogram_quantile(0.99, sum by (le) (rate(te_embed_tokenization_duration_bucket{job=\"$job\"}[10m])))", "hide": false, "legendFormat": "p99", "range": true, @@ -1294,7 +1345,7 @@ { "datasource": { "type": "prometheus", - "uid": "ads1wmjwuc7pce" + "uid": "${DS_PROMETHEUS}" }, "fieldConfig": { "defaults": { @@ -1338,8 +1389,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -1366,6 +1416,7 @@ "showLegend": false }, "tooltip": { + "maxHeight": 600, "mode": "single", "sort": "none" } @@ -1375,10 +1426,10 @@ { "datasource": { "type": "prometheus", - "uid": "ads1wmjwuc7pce" + "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "avg(te_batch_next_size_bucket{namespace=\"$namespace\", service=\"$service\"})", + "expr": "avg(te_batch_next_size_bucket{namespace=\"\", job=\"$job\"})", "legendFormat": "{{ pod }}", "range": true, "refId": "A" @@ -1390,7 +1441,7 @@ { "datasource": { "type": "prometheus", - "uid": "ads1wmjwuc7pce" + "uid": "${DS_PROMETHEUS}" }, "fieldConfig": { "defaults": { @@ -1434,8 +1485,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -1511,6 +1561,7 @@ "showLegend": true }, "tooltip": { + "maxHeight": 600, "mode": "single", "sort": "none" } @@ -1519,10 +1570,10 @@ { "datasource": { "type": "prometheus", - "uid": "ads1wmjwuc7pce" + "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "histogram_quantile(0.5, sum by (le) (rate(te_request_queue_duration_bucket{namespace=\"$namespace\", service=\"$service\"}[10m])))", + "expr": "histogram_quantile(0.5, sum by (le) (rate(te_request_queue_duration_bucket{namespace=\"\", job=\"$job\"}[10m])))", "legendFormat": "p50", "range": true, "refId": "A" @@ -1530,10 +1581,10 @@ { "datasource": { "type": "prometheus", - "uid": "ads1wmjwuc7pce" + "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "histogram_quantile(0.9, sum by (le) (rate(te_request_queue_duration_bucket{container=\"$service\"}[10m])))", + "expr": "histogram_quantile(0.9, sum by (le) (rate(te_request_queue_duration_bucket{job=\"$job\"}[10m])))", "hide": false, "legendFormat": "p90", "range": true, @@ -1542,10 +1593,10 @@ { "datasource": { "type": "prometheus", - "uid": "ads1wmjwuc7pce" + "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "histogram_quantile(0.99, sum by (le) (rate(te_request_queue_duration_bucket{container=\"$service\"}[10m])))", + "expr": "histogram_quantile(0.99, sum by (le) (rate(te_request_queue_duration_bucket{job=\"$job\"}[10m])))", "hide": false, "legendFormat": "p99", "range": true, @@ -1558,7 +1609,7 @@ { "datasource": { "type": "prometheus", - "uid": "ads1wmjwuc7pce" + "uid": "${DS_PROMETHEUS}" }, "fieldConfig": { "defaults": { @@ -1602,8 +1653,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -1630,6 +1680,7 @@ "showLegend": false }, "tooltip": { + "maxHeight": 600, "mode": "single", "sort": "none" } @@ -1639,10 +1690,10 @@ { "datasource": { "type": "prometheus", - "uid": "ads1wmjwuc7pce" + "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "avg(te_request_input_length_bucket{namespace=\"$namespace\", service=\"$service\"})", + "expr": "avg(te_request_input_length_bucket{namespace=\"\", job=\"$job\"})", "legendFormat": "{{ pod }}", "range": true, "refId": "A" @@ -1667,7 +1718,7 @@ { "datasource": { "type": "prometheus", - "uid": "ads1wmjwuc7pce" + "uid": "${DS_PROMETHEUS}" }, "fieldConfig": { "defaults": { @@ -1711,8 +1762,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -1788,6 +1838,7 @@ "showLegend": true }, "tooltip": { + "maxHeight": 600, "mode": "single", "sort": "none" } @@ -1796,10 +1847,10 @@ { "datasource": { "type": "prometheus", - "uid": "ads1wmjwuc7pce" + "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "histogram_quantile(0.5, sum by (le) (rate(te_embed_inference_duration_bucket{method=\"prefill\", namespace=\"$namespace\", service=\"$service\"}[10m])))", + "expr": "histogram_quantile(0.5, sum by (le) (rate(te_embed_inference_duration_bucket{method=\"\", namespace=\"\", job=\"$job\"}[10m])))", "legendFormat": "p50", "range": true, "refId": "A" @@ -1807,10 +1858,10 @@ { "datasource": { "type": "prometheus", - "uid": "ads1wmjwuc7pce" + "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "histogram_quantile(0.9, sum by (le) (rate(te_embed_inference_duration_bucket{method=\"prefill\", container=\"$service\"}[10m])))", + "expr": "histogram_quantile(0.9, sum by (le) (rate(te_embed_inference_duration_bucket{method=\"\", namespace=\"\", job=\"$job\"}[10m])))", "hide": false, "legendFormat": "p90", "range": true, @@ -1819,10 +1870,10 @@ { "datasource": { "type": "prometheus", - "uid": "ads1wmjwuc7pce" + "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "histogram_quantile(0.99, sum by (le) (rate(te_embed_inference_duration_bucket{method=\"prefill\", container=\"$service\"}[10m])))", + "expr": "histogram_quantile(0.99, sum by (le) (rate(te_embed_inference_duration_bucket{method=\"\", namespace=\"\", job=\"$job\"}[10m])))", "hide": false, "legendFormat": "p99", "range": true, @@ -1845,7 +1896,7 @@ "dataFormat": "tsbuckets", "datasource": { "type": "prometheus", - "uid": "ads1wmjwuc7pce" + "uid": "${DS_PROMETHEUS}" }, "fieldConfig": { "defaults": { @@ -1905,6 +1956,7 @@ }, "showValue": "never", "tooltip": { + "maxHeight": 600, "mode": "single", "showColorScale": false, "yHistogram": false @@ -1916,20 +1968,20 @@ "unit": "s" } }, - "pluginVersion": "11.1.0", + "pluginVersion": "11.0.0", "reverseYBuckets": false, "targets": [ { "datasource": { "type": "prometheus", - "uid": "ads1wmjwuc7pce" + "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", "exemplar": true, - "expr": "sum(increase(te_embed_inference_duration_bucket{method=\"prefill\", namespace=\"$namespace\", service=\"$service\"}[5m])) by (le)", + "expr": "sum(increase(te_embed_inference_duration_bucket{method=\"\", namespace=\"\", job=\"$job\"}[5m])) by (le)", "format": "heatmap", "interval": "", - "legendFormat": "{{ le }}", + "legendFormat": "{{ le }}", "range": true, "refId": "A" } @@ -1967,7 +2019,7 @@ { "datasource": { "type": "prometheus", - "uid": "ads1wmjwuc7pce" + "uid": "${DS_PROMETHEUS}" }, "fieldConfig": { "defaults": { @@ -2011,8 +2063,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -2088,6 +2139,7 @@ "showLegend": true }, "tooltip": { + "maxHeight": 600, "mode": "single", "sort": "none" } @@ -2096,10 +2148,10 @@ { "datasource": { "type": "prometheus", - "uid": "ads1wmjwuc7pce" + "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "histogram_quantile(0.5, sum by (le) (rate(te_embed_inference_duration_bucket{namespace=\"$namespace\", service=\"$service\"}[10m])))", + "expr": "histogram_quantile(0.5, sum by (le) (rate(te_embed_inference_duration_bucket{namespace=\"\", job=\"$job\"}[10m])))", "legendFormat": "p50", "range": true, "refId": "A" @@ -2107,10 +2159,10 @@ { "datasource": { "type": "prometheus", - "uid": "ads1wmjwuc7pce" + "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "histogram_quantile(0.9, sum by (le) (rate(te_embed_inference_duration_bucket{container=\"$service\"}[10m])))", + "expr": "histogram_quantile(0.9, sum by (le) (rate(te_embed_inference_duration_bucket{job=\"$job\"}[10m])))", "hide": false, "legendFormat": "p90", "range": true, @@ -2119,10 +2171,10 @@ { "datasource": { "type": "prometheus", - "uid": "ads1wmjwuc7pce" + "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "histogram_quantile(0.99, sum by (le) (rate(te_embed_inference_duration_bucket{container=\"$service\"}[10m])))", + "expr": "histogram_quantile(0.99, sum by (le) (rate(te_embed_inference_duration_bucket{job=\"$job\"}[10m])))", "hide": false, "legendFormat": "p99", "range": true, @@ -2153,17 +2205,17 @@ "list": [ { "current": { - "selected": false, + "selected": true, "text": "All", "value": "$__all" }, "datasource": { "type": "prometheus", - "uid": "ads1wmjwuc7pce" + "uid": "${DS_PROMETHEUS}" }, "definition": "label_values(te_request_count,namespace)", "hide": 0, - "includeAll": true, + "includeAll": false, "multi": false, "name": "namespace", "options": [], @@ -2179,22 +2231,22 @@ }, { "current": { - "selected": false, - "text": "All", - "value": "$__all" + "selected": true, + "text": "chatqna-tei", + "value": "chatqna-tei" }, "datasource": { "type": "prometheus", - "uid": "ads1wmjwuc7pce" + "uid": "${DS_PROMETHEUS}" }, - "definition": "label_values(te_request_count{namespace=\"$namespace\"},service)", + "definition": "label_values(te_request_count{namespace=\"\"},job)", "hide": 0, - "includeAll": true, + "includeAll": false, "multi": false, - "name": "service", + "name": "job", "options": [], "query": { - "query": "label_values(te_request_count{namespace=\"$namespace\"},service)", + "query": "label_values(te_request_count{namespace=\"\"},job)", "refId": "PrometheusVariableQueryEditor-VariableQuery" }, "refresh": 1, @@ -2209,12 +2261,13 @@ "from": "now-1h", "to": "now-1m" }, + "timeRangeUpdatedDuringEditOrView": false, "timepicker": { "nowDelay": "1m" }, "timezone": "", "title": "Text Embedding Inference", "uid": "RHSk7EL4kdqbc", - "version": 8, + "version": 1, "weekStart": "" } diff --git a/evals/benchmark/grafana/tgi_grafana.json b/evals/benchmark/grafana/tgi_grafana.json index a79b7595..f1602c78 100644 --- a/evals/benchmark/grafana/tgi_grafana.json +++ b/evals/benchmark/grafana/tgi_grafana.json @@ -67,7 +67,7 @@ "editable": true, "fiscalYearStartMonth": 0, "graphTooltip": 2, - "id": 551, + "id": 1, "links": [], "liveNow": false, "panels": [ @@ -124,7 +124,7 @@ "textMode": "auto", "wideLayout": true }, - "pluginVersion": "10.4.2", + "pluginVersion": "11.0.0", "targets": [ { "datasource": { @@ -132,7 +132,7 @@ "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" }, "editorMode": "code", - "expr": "(histogram_quantile(0.5, sum by (le) (rate(tgi_request_queue_duration_bucket{container=\"$service\"}[10m]))) * 1000) > 0", + "expr": "(histogram_quantile(0.5, sum by (le) (rate(tgi_request_queue_duration_bucket{job=\"$job\"}[10m]))) * 1000) > 0", "hide": true, "instant": false, "legendFormat": "__auto", @@ -145,7 +145,7 @@ "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" }, "editorMode": "code", - "expr": "(histogram_quantile(0.5, sum by (le) (rate(tgi_batch_inference_duration_bucket{method=\"prefill\", container=\"$service\"}[10m]))) * 1000) > 0", + "expr": "(histogram_quantile(0.5, sum by (le) (rate(tgi_batch_inference_duration_bucket{method=\"prefill\", job=\"$job\"}[10m]))) * 1000) > 0", "hide": true, "instant": false, "legendFormat": "__auto", @@ -219,7 +219,7 @@ "textMode": "auto", "wideLayout": true }, - "pluginVersion": "10.4.2", + "pluginVersion": "11.0.0", "targets": [ { "datasource": { @@ -227,7 +227,7 @@ "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" }, "editorMode": "code", - "expr": "(histogram_quantile(0.5, sum by (le) (rate(tgi_batch_forward_duration_bucket{method=\"decode\", container=\"$service\"}[10m]))) * 1000)>0", + "expr": "(histogram_quantile(0.5, sum by (le) (rate(tgi_batch_forward_duration_bucket{method=\"decode\", job=\"$job\"}[10m]))) * 1000)>0", "instant": false, "range": true, "refId": "A" @@ -284,7 +284,7 @@ "textMode": "auto", "wideLayout": true }, - "pluginVersion": "10.4.2", + "pluginVersion": "11.0.0", "targets": [ { "datasource": { @@ -292,7 +292,7 @@ "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" }, "editorMode": "code", - "expr": "sum((rate(tgi_request_generated_tokens_sum{container=\"$service\"}[10m]) / rate(tgi_request_generated_tokens_count{container=\"$service\"}[10m]))>0)", + "expr": "sum((rate(tgi_request_generated_tokens_sum{job=\"$job\"}[10m]) / rate(tgi_request_generated_tokens_count{job=\"$job\"}[10m]))>0)", "instant": false, "range": true, "refId": "A" @@ -425,6 +425,7 @@ "showLegend": true }, "tooltip": { + "maxHeight": 600, "mode": "single", "sort": "none" } @@ -436,7 +437,7 @@ "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" }, "editorMode": "code", - "expr": "histogram_quantile(0.5, sum by (le) (rate(tgi_request_input_length_bucket{container=\"$service\"}[10m])))", + "expr": "histogram_quantile(0.5, sum by (le) (rate(tgi_request_input_length_bucket{job=\"$job\"}[10m])))", "legendFormat": "p50", "range": true, "refId": "A" @@ -447,7 +448,7 @@ "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" }, "editorMode": "code", - "expr": "histogram_quantile(0.9, sum by (le) (rate(tgi_request_input_length_bucket{container=\"$service\"}[10m])))", + "expr": "histogram_quantile(0.9, sum by (le) (rate(tgi_request_input_length_bucket{job=\"$job\"}[10m])))", "hide": false, "legendFormat": "p90", "range": true, @@ -459,7 +460,7 @@ "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" }, "editorMode": "code", - "expr": "histogram_quantile(0.99, sum by (le) (rate(tgi_request_input_length_bucket{container=\"$service\"}[10m])))", + "expr": "histogram_quantile(0.99, sum by (le) (rate(tgi_request_input_length_bucket{job=\"$job\"}[10m])))", "hide": false, "legendFormat": "p99", "range": true, @@ -593,6 +594,7 @@ "showLegend": true }, "tooltip": { + "maxHeight": 600, "mode": "single", "sort": "none" } @@ -604,7 +606,7 @@ "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" }, "editorMode": "code", - "expr": "histogram_quantile(0.5, sum by (le) (rate(tgi_request_generated_tokens_bucket{container=\"$service\"}[10m])))", + "expr": "histogram_quantile(0.5, sum by (le) (rate(tgi_request_generated_tokens_bucket{job=\"$job\"}[10m])))", "legendFormat": "p50", "range": true, "refId": "A" @@ -615,7 +617,7 @@ "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" }, "editorMode": "code", - "expr": "histogram_quantile(0.9, sum by (le) (rate(tgi_request_generated_tokens_bucket{container=\"$service\"}[10m])))", + "expr": "histogram_quantile(0.9, sum by (le) (rate(tgi_request_generated_tokens_bucket{job=\"$job\"}[10m])))", "hide": false, "legendFormat": "p90", "range": true, @@ -627,7 +629,7 @@ "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" }, "editorMode": "code", - "expr": "histogram_quantile(0.99, sum by (le) (rate(tgi_request_generated_tokens_bucket{container=\"$service\"}[10m])))", + "expr": "histogram_quantile(0.99, sum by (le) (rate(tgi_request_generated_tokens_bucket{job=\"$job\"}[10m])))", "hide": false, "legendFormat": "p99", "range": true, @@ -728,6 +730,7 @@ "showLegend": true }, "tooltip": { + "maxHeight": 600, "mode": "single", "sort": "none" } @@ -739,7 +742,7 @@ "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" }, "editorMode": "code", - "expr": "sum(increase(tgi_request_success{container=\"$service\"}[1m]))", + "expr": "sum(increase(tgi_request_success{job=\"$job\"}[1m]))", "legendFormat": "Success", "range": true, "refId": "A" @@ -750,7 +753,7 @@ "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" }, "editorMode": "code", - "expr": "sum(increase(tgi_request_failure{container=\"$service\"}[1m])) by (err)", + "expr": "sum(increase(tgi_request_failure{job=\"$job\"}[1m])) by (err)", "hide": false, "legendFormat": "Error: {{err}}", "range": true, @@ -884,6 +887,7 @@ "showLegend": true }, "tooltip": { + "maxHeight": 600, "mode": "single", "sort": "none" } @@ -895,7 +899,7 @@ "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" }, "editorMode": "code", - "expr": "histogram_quantile(0.5, sum by (le) (rate(tgi_request_mean_time_per_token_duration_bucket{container=\"$service\"}[10m])))", + "expr": "histogram_quantile(0.5, sum by (le) (rate(tgi_request_mean_time_per_token_duration_bucket{job=\"$job\"}[10m])))", "legendFormat": "p50", "range": true, "refId": "A" @@ -906,7 +910,7 @@ "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" }, "editorMode": "code", - "expr": "histogram_quantile(0.9, sum by (le) (rate(tgi_request_mean_time_per_token_duration_bucket{container=\"$service\"}[10m])))", + "expr": "histogram_quantile(0.9, sum by (le) (rate(tgi_request_mean_time_per_token_duration_bucket{job=\"$job\"}[10m])))", "hide": false, "legendFormat": "p90", "range": true, @@ -918,7 +922,7 @@ "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" }, "editorMode": "code", - "expr": "histogram_quantile(0.99, sum by (le) (rate(tgi_request_mean_time_per_token_duration_bucket{container=\"$service\"}[10m])))", + "expr": "histogram_quantile(0.99, sum by (le) (rate(tgi_request_mean_time_per_token_duration_bucket{job=\"$job\"}[10m])))", "hide": false, "legendFormat": "p99", "range": true, @@ -1001,6 +1005,7 @@ }, "showValue": "never", "tooltip": { + "maxHeight": 600, "mode": "single", "showColorScale": false, "yHistogram": false @@ -1012,7 +1017,7 @@ "unit": "s" } }, - "pluginVersion": "10.4.2", + "pluginVersion": "11.0.0", "reverseYBuckets": false, "targets": [ { @@ -1022,7 +1027,7 @@ }, "editorMode": "code", "exemplar": true, - "expr": "sum(increase(tgi_request_mean_time_per_token_duration_bucket{container=\"$service\"}[5m])) by (le)", + "expr": "sum(increase(tgi_request_mean_time_per_token_duration_bucket{job=\"$job\"}[5m])) by (le)", "format": "heatmap", "interval": "", "legendFormat": "{{ le }}", @@ -1125,6 +1130,7 @@ "showLegend": false }, "tooltip": { + "maxHeight": 600, "mode": "single", "sort": "none" } @@ -1137,7 +1143,7 @@ "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" }, "editorMode": "code", - "expr": "count(tgi_request_count{container=\"$service\"})", + "expr": "count(tgi_request_count{job=\"$job\"})", "legendFormat": "Replicas", "range": true, "refId": "A" @@ -1196,7 +1202,7 @@ "showThresholdMarkers": true, "sizing": "auto" }, - "pluginVersion": "10.4.2", + "pluginVersion": "11.0.0", "targets": [ { "datasource": { @@ -1204,7 +1210,7 @@ "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" }, "editorMode": "code", - "expr": "sum(tgi_queue_size{container=\"$service\"})", + "expr": "sum(tgi_queue_size{job=\"$job\"})", "legendFormat": "__auto", "range": true, "refId": "A" @@ -1301,6 +1307,7 @@ "showLegend": false }, "tooltip": { + "maxHeight": 600, "mode": "single", "sort": "none" } @@ -1313,7 +1320,7 @@ "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" }, "editorMode": "code", - "expr": "avg(tgi_batch_current_max_tokens{container=\"$service\"})", + "expr": "avg(tgi_batch_current_max_tokens{job=\"$job\"})", "legendFormat": "{{ pod }}", "range": true, "refId": "A" @@ -1446,6 +1453,7 @@ "showLegend": true }, "tooltip": { + "maxHeight": 600, "mode": "single", "sort": "none" } @@ -1457,7 +1465,7 @@ "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" }, "editorMode": "code", - "expr": "histogram_quantile(0.5, sum by (le) (rate(tgi_request_skipped_tokens_bucket{container=\"$service\"}[10m])))", + "expr": "histogram_quantile(0.5, sum by (le) (rate(tgi_request_skipped_tokens_bucket{job=\"$job\"}[10m])))", "legendFormat": "p50", "range": true, "refId": "A" @@ -1468,7 +1476,7 @@ "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" }, "editorMode": "code", - "expr": "histogram_quantile(0.9, sum by (le) (rate(tgi_request_skipped_tokens_bucket{container=\"$service\"}[10m])))", + "expr": "histogram_quantile(0.9, sum by (le) (rate(tgi_request_skipped_tokens_bucket{job=\"$job\"}[10m])))", "hide": false, "legendFormat": "p90", "range": true, @@ -1480,7 +1488,7 @@ "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" }, "editorMode": "code", - "expr": "histogram_quantile(0.99, sum by (le) (rate(tgi_request_skipped_tokens_bucket{container=\"$service\"}[10m])))", + "expr": "histogram_quantile(0.99, sum by (le) (rate(tgi_request_skipped_tokens_bucket{job=\"$job\"}[10m])))", "hide": false, "legendFormat": "p99", "range": true, @@ -1614,6 +1622,7 @@ "showLegend": true }, "tooltip": { + "maxHeight": 600, "mode": "single", "sort": "none" } @@ -1625,7 +1634,7 @@ "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" }, "editorMode": "code", - "expr": "histogram_quantile(0.5, sum by (le) (rate(tgi_request_input_length_bucket{container=\"$service\"}[10m])))", + "expr": "histogram_quantile(0.5, sum by (le) (rate(tgi_request_input_length_bucket{job=\"$job\"}[10m])))", "legendFormat": "p50", "range": true, "refId": "A" @@ -1636,7 +1645,7 @@ "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" }, "editorMode": "code", - "expr": "histogram_quantile(0.9, sum by (le) (rate(tgi_request_input_length_bucket{container=\"$service\"}[10m])))", + "expr": "histogram_quantile(0.9, sum by (le) (rate(tgi_request_input_length_bucket{job=\"$job\"}[10m])))", "hide": false, "legendFormat": "p90", "range": true, @@ -1648,7 +1657,7 @@ "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" }, "editorMode": "code", - "expr": "histogram_quantile(0.99, sum by (le) (rate(tgi_request_input_length_bucket{container=\"$service\"}[10m])))", + "expr": "histogram_quantile(0.99, sum by (le) (rate(tgi_request_input_length_bucket{job=\"$job\"}[10m])))", "hide": false, "legendFormat": "p99", "range": true, @@ -1782,6 +1791,7 @@ "showLegend": true }, "tooltip": { + "maxHeight": 600, "mode": "single", "sort": "none" } @@ -1793,7 +1803,7 @@ "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" }, "editorMode": "code", - "expr": "histogram_quantile(0.5, sum by (le) (rate(tgi_request_duration_bucket{container=\"$service\"}[10m])))", + "expr": "histogram_quantile(0.5, sum by (le) (rate(tgi_request_duration_bucket{job=\"$job\"}[10m])))", "legendFormat": "p50", "range": true, "refId": "A" @@ -1804,7 +1814,7 @@ "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" }, "editorMode": "code", - "expr": "histogram_quantile(0.9, sum by (le) (rate(tgi_request_duration_bucket{container=\"$service\"}[10m])))", + "expr": "histogram_quantile(0.9, sum by (le) (rate(tgi_request_duration_bucket{job=\"$job\"}[10m])))", "hide": false, "legendFormat": "p90", "range": true, @@ -1816,7 +1826,7 @@ "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" }, "editorMode": "code", - "expr": "histogram_quantile(0.99, sum by (le) (rate(tgi_request_duration_bucket{container=\"$service\"}[10m])))", + "expr": "histogram_quantile(0.99, sum by (le) (rate(tgi_request_duration_bucket{job=\"$job\"}[10m])))", "hide": false, "legendFormat": "p99", "range": true, @@ -1901,6 +1911,7 @@ "showLegend": false }, "tooltip": { + "maxHeight": 600, "mode": "single", "sort": "none" } @@ -1913,7 +1924,7 @@ "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" }, "editorMode": "code", - "expr": "avg(tgi_batch_current_size{container=\"$service\"})", + "expr": "avg(tgi_batch_current_size{job=\"$job\"})", "legendFormat": "{{ pod }}", "range": true, "refId": "A" @@ -2000,6 +2011,7 @@ "showLegend": true }, "tooltip": { + "maxHeight": 600, "mode": "single", "sort": "none" } @@ -2011,7 +2023,7 @@ "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" }, "editorMode": "code", - "expr": "sum(increase(tgi_batch_concat{container=\"$service\"}[1m])) by (reason)", + "expr": "sum(increase(tgi_batch_concat{job=\"$job\"}[1m])) by (reason)", "hide": false, "legendFormat": "Reason: {{ reason }}", "range": true, @@ -2145,6 +2157,7 @@ "showLegend": true }, "tooltip": { + "maxHeight": 600, "mode": "single", "sort": "none" } @@ -2156,7 +2169,7 @@ "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" }, "editorMode": "code", - "expr": "histogram_quantile(0.5, sum by (le) (rate(tgi_request_queue_duration_bucket{container=\"$service\"}[10m])))", + "expr": "histogram_quantile(0.5, sum by (le) (rate(tgi_request_queue_duration_bucket{job=\"$job\"}[10m])))", "legendFormat": "p50", "range": true, "refId": "A" @@ -2167,7 +2180,7 @@ "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" }, "editorMode": "code", - "expr": "histogram_quantile(0.9, sum by (le) (rate(tgi_request_queue_duration_bucket{container=\"$service\"}[10m])))", + "expr": "histogram_quantile(0.9, sum by (le) (rate(tgi_request_queue_duration_bucket{job=\"$job\"}[10m])))", "hide": false, "legendFormat": "p90", "range": true, @@ -2179,7 +2192,7 @@ "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" }, "editorMode": "code", - "expr": "histogram_quantile(0.99, sum by (le) (rate(tgi_request_queue_duration_bucket{container=\"$service\"}[10m])))", + "expr": "histogram_quantile(0.99, sum by (le) (rate(tgi_request_queue_duration_bucket{job=\"$job\"}[10m])))", "hide": false, "legendFormat": "p99", "range": true, @@ -2249,8 +2262,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -2337,7 +2349,7 @@ "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" }, "editorMode": "code", - "expr": "histogram_quantile(0.5, sum by (le) (rate(tgi_batch_inference_duration_bucket{method=\"prefill\", container=\"$service\"}[10m])))", + "expr": "histogram_quantile(0.5, sum by (le) (rate(tgi_batch_inference_duration_bucket{method=\"prefill\", job=\"$job\"}[10m])))", "legendFormat": "p50", "range": true, "refId": "A" @@ -2348,7 +2360,7 @@ "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" }, "editorMode": "code", - "expr": "histogram_quantile(0.9, sum by (le) (rate(tgi_batch_inference_duration_bucket{method=\"prefill\", container=\"$service\"}[10m])))", + "expr": "histogram_quantile(0.9, sum by (le) (rate(tgi_batch_inference_duration_bucket{method=\"prefill\", job=\"$job\"}[10m])))", "hide": false, "legendFormat": "p90", "range": true, @@ -2360,7 +2372,7 @@ "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" }, "editorMode": "code", - "expr": "histogram_quantile(0.99, sum by (le) (rate(tgi_batch_inference_duration_bucket{method=\"prefill\", container=\"$service\"}[10m])))", + "expr": "histogram_quantile(0.99, sum by (le) (rate(tgi_batch_inference_duration_bucket{method=\"prefill\", job=\"$job\"}[10m])))", "hide": false, "legendFormat": "p99", "range": true, @@ -2464,7 +2476,7 @@ }, "editorMode": "code", "exemplar": true, - "expr": "sum(increase(tgi_batch_inference_duration_bucket{method=\"prefill\", container=\"$service\"}[5m])) by (le)", + "expr": "sum(increase(tgi_batch_inference_duration_bucket{method=\"prefill\", job=\"$job\"}[5m])) by (le)", "format": "heatmap", "interval": "", "legendFormat": "{{ le }}", @@ -2549,8 +2561,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -2637,7 +2648,7 @@ "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" }, "editorMode": "code", - "expr": "histogram_quantile(0.5, sum by (le) (rate(tgi_batch_inference_duration_bucket{method=\"decode\", container=\"$service\"}[10m])))", + "expr": "histogram_quantile(0.5, sum by (le) (rate(tgi_batch_inference_duration_bucket{method=\"decode\", job=\"$job\"}[10m])))", "legendFormat": "p50", "range": true, "refId": "A" @@ -2648,7 +2659,7 @@ "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" }, "editorMode": "code", - "expr": "histogram_quantile(0.9, sum by (le) (rate(tgi_batch_inference_duration_bucket{method=\"decode\", container=\"$service\"}[10m])))", + "expr": "histogram_quantile(0.9, sum by (le) (rate(tgi_batch_inference_duration_bucket{method=\"decode\", job=\"$job\"}[10m])))", "hide": false, "legendFormat": "p90", "range": true, @@ -2660,7 +2671,7 @@ "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" }, "editorMode": "code", - "expr": "histogram_quantile(0.99, sum by (le) (rate(tgi_batch_inference_duration_bucket{method=\"decode\", container=\"$service\"}[10m])))", + "expr": "histogram_quantile(0.99, sum by (le) (rate(tgi_batch_inference_duration_bucket{method=\"decode\", job=\"$job\"}[10m])))", "hide": false, "legendFormat": "p99", "range": true, @@ -2764,7 +2775,7 @@ }, "editorMode": "code", "exemplar": true, - "expr": "sum(increase(tgi_batch_inference_duration_bucket{method=\"decode\", container=\"$service\"}[5m])) by (le)", + "expr": "sum(increase(tgi_batch_inference_duration_bucket{method=\"decode\", job=\"$job\"}[5m])) by (le)", "format": "heatmap", "interval": "", "legendFormat": "{{ le }}", @@ -2849,8 +2860,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -2937,7 +2947,7 @@ "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" }, "editorMode": "code", - "expr": "histogram_quantile(0.5, sum by (le) (rate(tgi_batch_forward_duration_bucket{method=\"decode\", container=\"$service\"}[10m])))", + "expr": "histogram_quantile(0.5, sum by (le) (rate(tgi_batch_forward_duration_bucket{method=\"decode\", job=\"$job\"}[10m])))", "legendFormat": "p50", "range": true, "refId": "A" @@ -2948,7 +2958,7 @@ "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" }, "editorMode": "code", - "expr": "histogram_quantile(0.9, sum by (le) (rate(tgi_batch_forward_duration_bucket{method=\"decode\", container=\"$service\"}[10m])))", + "expr": "histogram_quantile(0.9, sum by (le) (rate(tgi_batch_forward_duration_bucket{method=\"decode\", job=\"$job\"}[10m])))", "hide": false, "legendFormat": "p90", "range": true, @@ -2960,7 +2970,7 @@ "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" }, "editorMode": "code", - "expr": "histogram_quantile(0.99, sum by (le) (rate(tgi_batch_forward_duration_bucket{method=\"decode\", container=\"$service\"}[10m])))", + "expr": "histogram_quantile(0.99, sum by (le) (rate(tgi_batch_forward_duration_bucket{method=\"decode\", job=\"$job\"}[10m])))", "hide": false, "legendFormat": "p99", "range": true, @@ -3064,7 +3074,7 @@ }, "editorMode": "code", "exemplar": true, - "expr": "sum(increase(tgi_batch_forward_duration_bucket{method=\"decode\", container=\"$service\"}[5m])) by (le)", + "expr": "sum(increase(tgi_batch_forward_duration_bucket{method=\"decode\", job=\"$job\"}[5m])) by (le)", "format": "heatmap", "interval": "", "legendFormat": "{{ le }}", @@ -3136,8 +3146,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -3224,7 +3233,7 @@ "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" }, "editorMode": "code", - "expr": "histogram_quantile(0.5, sum by (le) (rate(tgi_batch_decode_duration_bucket{method=\"decode\", container=\"$service\"}[10m])))", + "expr": "histogram_quantile(0.5, sum by (le) (rate(tgi_batch_decode_duration_bucket{method=\"decode\", job=\"$job\"}[10m])))", "legendFormat": "p50", "range": true, "refId": "A" @@ -3235,7 +3244,7 @@ "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" }, "editorMode": "code", - "expr": "histogram_quantile(0.9, sum by (le) (rate(tgi_batch_decode_duration_bucket{method=\"decode\", container=\"$service\"}[10m])))", + "expr": "histogram_quantile(0.9, sum by (le) (rate(tgi_batch_decode_duration_bucket{method=\"decode\", job=\"$job\"}[10m])))", "hide": false, "legendFormat": "p90", "range": true, @@ -3247,7 +3256,7 @@ "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" }, "editorMode": "code", - "expr": "histogram_quantile(0.99, sum by (le) (rate(tgi_batch_decode_duration_bucket{method=\"decode\", container=\"$service\"}[10m])))", + "expr": "histogram_quantile(0.99, sum by (le) (rate(tgi_batch_decode_duration_bucket{method=\"decode\", job=\"$job\"}[10m])))", "hide": false, "legendFormat": "p99", "range": true, @@ -3351,7 +3360,7 @@ }, "editorMode": "code", "exemplar": true, - "expr": "sum(increase(tgi_batch_decode_duration_bucket{method=\"decode\", container=\"$service\"}[5m])) by (le)", + "expr": "sum(increase(tgi_batch_decode_duration_bucket{method=\"decode\", job=\"$job\"}[5m])) by (le)", "format": "heatmap", "interval": "", "legendFormat": "{{ le }}", @@ -3423,8 +3432,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -3511,7 +3519,7 @@ "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" }, "editorMode": "code", - "expr": "histogram_quantile(0.5, sum by (le) (rate(tgi_batch_filter_duration_bucket{method=\"decode\", container=\"$service\"}[10m])))", + "expr": "histogram_quantile(0.5, sum by (le) (rate(tgi_batch_filter_duration_bucket{method=\"decode\", job=\"$job\"}[10m])))", "legendFormat": "p50", "range": true, "refId": "A" @@ -3522,7 +3530,7 @@ "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" }, "editorMode": "code", - "expr": "histogram_quantile(0.9, sum by (le) (rate(tgi_batch_filter_duration_bucket{method=\"decode\", container=\"$service\"}[10m])))", + "expr": "histogram_quantile(0.9, sum by (le) (rate(tgi_batch_filter_duration_bucket{method=\"decode\", job=\"$job\"}[10m])))", "hide": false, "legendFormat": "p90", "range": true, @@ -3534,7 +3542,7 @@ "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" }, "editorMode": "code", - "expr": "histogram_quantile(0.99, sum by (le) (rate(tgi_batch_filter_duration_bucket{method=\"decode\", container=\"$service\"}[10m])))", + "expr": "histogram_quantile(0.99, sum by (le) (rate(tgi_batch_filter_duration_bucket{method=\"decode\", job=\"$job\"}[10m])))", "hide": false, "legendFormat": "p99", "range": true, @@ -3638,7 +3646,7 @@ }, "editorMode": "code", "exemplar": true, - "expr": "sum(increase(tgi_batch_filter_duration_bucket{method=\"decode\", container=\"$service\"}[5m])) by (le)", + "expr": "sum(increase(tgi_batch_filter_duration_bucket{method=\"decode\", job=\"$job\"}[5m])) by (le)", "format": "heatmap", "interval": "", "legendFormat": "{{ le }}", @@ -3710,8 +3718,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -3798,7 +3805,7 @@ "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" }, "editorMode": "code", - "expr": "histogram_quantile(0.5, sum by (le) (rate(tgi_batch_concat_duration_bucket{method=\"decode\", container=\"$service\"}[10m])))", + "expr": "histogram_quantile(0.5, sum by (le) (rate(tgi_batch_concat_duration_bucket{method=\"decode\", job=\"$job\"}[10m])))", "legendFormat": "p50", "range": true, "refId": "A" @@ -3809,7 +3816,7 @@ "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" }, "editorMode": "code", - "expr": "histogram_quantile(0.9, sum by (le) (rate(tgi_batch_concat_duration_bucket{method=\"decode\", container=\"$service\"}[10m])))", + "expr": "histogram_quantile(0.9, sum by (le) (rate(tgi_batch_concat_duration_bucket{method=\"decode\", job=\"$job\"}[10m])))", "hide": false, "legendFormat": "p90", "range": true, @@ -3821,7 +3828,7 @@ "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" }, "editorMode": "code", - "expr": "histogram_quantile(0.99, sum by (le) (rate(tgi_batch_concat_duration_bucket{method=\"decode\", container=\"$service\"}[10m])))", + "expr": "histogram_quantile(0.99, sum by (le) (rate(tgi_batch_concat_duration_bucket{method=\"decode\", job=\"$job\"}[10m])))", "hide": false, "legendFormat": "p99", "range": true, @@ -3925,7 +3932,7 @@ }, "editorMode": "code", "exemplar": true, - "expr": "sum(increase(tgi_batch_concat_duration_bucket{method=\"decode\", container=\"$service\"}[5m])) by (le)", + "expr": "sum(increase(tgi_batch_concat_duration_bucket{method=\"decode\", job=\"$job\"}[5m])) by (le)", "format": "heatmap", "interval": "", "legendFormat": "{{ le }}", @@ -3958,22 +3965,23 @@ "list": [ { "current": { - "selected": false, - "text": "gpu-txt-gen-cohereforai-c4ai-command-r-plu-ba7f1", - "value": "gpu-txt-gen-cohereforai-c4ai-command-r-plu-ba7f1" + "isNone": true, + "selected": true, + "text": "None", + "value": "" }, "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" }, - "definition": "label_values(tgi_request_count, container)", + "definition": "label_values(tgi_request_count, job)", "hide": 0, "includeAll": false, "multi": false, - "name": "service", + "name": "job", "options": [], "query": { - "query": "label_values(tgi_request_count, container)", + "query": "label_values(tgi_request_count, job)", "refId": "StandardVariableQuery" }, "refresh": 1, @@ -3988,12 +3996,13 @@ "from": "now-30m", "to": "now-30s" }, + "timeRangeUpdatedDuringEditOrView": false, "timepicker": { "nowDelay": "30s" }, "timezone": "", "title": "Text Generation Inference", "uid": "RHSk7EL4kdqsd", - "version": 12, + "version": 1, "weekStart": "" - } +}