diff --git a/docker/compose.yaml b/docker/compose.yaml index 370ba79c..d0f30e87 100644 --- a/docker/compose.yaml +++ b/docker/compose.yaml @@ -1,19 +1,31 @@ -x-template: &ydb-common + +x-node: &ydb-common image: cr.yandex/crptqonuodf51kdj7a7d/ydb:24.2.7 restart: always hostname: localhost platform: linux/amd64 privileged: true + network_mode: host volumes: - - ./cfg/config.yaml:/opt/ydb/cfg/config.yaml + - ./configs/ydb/config.yaml:/opt/ydb/cfg/config.yaml + +x-deploy: &ydb-deploy + restart_policy: + condition: any + resources: + limits: + cpus: '1' + memory: 1000M + reservations: + cpus: '0.1' + memory: 250M name: ydb services: - static-0: <<: *ydb-common - container_name: static-0 + container_name: ydb-static-0 command: - /opt/ydb/bin/ydbd - server @@ -39,6 +51,8 @@ services: timeout: 1s retries: 3 start_period: 30s + deploy: + <<: *ydb-deploy static-init: <<: *ydb-common @@ -77,7 +91,7 @@ services: dynamic-1: <<: *ydb-common - container_name: dynamic-1 + container_name: ydb-dynamic-1 command: - /opt/ydb/bin/ydbd - server @@ -112,10 +126,12 @@ services: condition: service_completed_successfully tenant-init: condition: service_completed_successfully + deploy: + <<: *ydb-deploy dynamic-2: <<: *ydb-common - container_name: dynamic-2 + container_name: ydb-dynamic-2 command: - /opt/ydb/bin/ydbd - server @@ -150,10 +166,12 @@ services: condition: service_completed_successfully tenant-init: condition: service_completed_successfully + deploy: + <<: *ydb-deploy dynamic-3: <<: *ydb-common - container_name: dynamic-3 + container_name: ydb-dynamic-3 command: - /opt/ydb/bin/ydbd - server @@ -188,4 +206,63 @@ services: condition: service_completed_successfully tenant-init: condition: service_completed_successfully + deploy: + <<: *ydb-deploy + + prometheus: + image: prom/prometheus + restart: unless-stopped + ports: + - "9090:9090" + volumes: + - ./configs/prometheus/prometheus.yml:/etc/prometheus/prometheus.yml + network_mode: host + deploy: &monitoring-deploy + resources: + limits: + cpus: '0.1' + memory: 1000M + reservations: + cpus: '0.001' + memory: 50M + + prometheus-pushgateway: + image: prom/pushgateway + restart: unless-stopped + ports: + - "9091:9091" + network_mode: host + deploy: + <<: *monitoring-deploy + + grafana: + image: grafana/grafana-oss + restart: unless-stopped + platform: linux/amd64 + ports: + - "10000:10000" + volumes: + - ./configs/grafana/provisioning:/etc/grafana/provisioning + environment: + - GF_SERVER_HTTP_PORT=10000 + - GF_AUTH_DISABLE_LOGIN_FORM=true + - GF_AUTH_ANONYMOUS_ENABLED=true + - GF_AUTH_ANONYMOUS_ORG_NAME=Main Org. + - GF_AUTH_ANONYMOUS_ORG_ROLE=Admin + - GF_USERS_ALLOW_SIGN_UP=false + - GF_RENDERING_SERVER_URL=http://localhost:10001/render + - GF_RENDERING_CALLBACK_URL=http://localhost:10000/ + network_mode: host + deploy: + <<: *monitoring-deploy + + grafana-renderer: + image: grafana/grafana-image-renderer + ports: + - "10001:10001" + volumes: + - ./configs/grafana/renderer/config.json:/usr/src/app/config.json + network_mode: host + deploy: + <<: *monitoring-deploy diff --git a/docker/configs/grafana/provisioning/dashboards/dashboard.yml b/docker/configs/grafana/provisioning/dashboards/dashboard.yml new file mode 100644 index 00000000..c6784142 --- /dev/null +++ b/docker/configs/grafana/provisioning/dashboards/dashboard.yml @@ -0,0 +1,6 @@ +apiVersion: 1 + +providers: + - name: 'SLO' + options: + path: /etc/grafana/provisioning/dashboards diff --git a/docker/configs/grafana/provisioning/dashboards/slo.json b/docker/configs/grafana/provisioning/dashboards/slo.json new file mode 100644 index 00000000..69d76bf7 --- /dev/null +++ b/docker/configs/grafana/provisioning/dashboards/slo.json @@ -0,0 +1,646 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "target": { + "limit": 100, + "matchAny": false, + "tags": [], + "type": "dashboard" + }, + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "links": [], + "liveNow": false, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 0 + }, + "id": 12, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "builder", + "expr": "rate(oks[$__rate_interval]) > 0", + "hide": false, + "legendFormat": "({{sdk}}-{{sdkVersion}}) {{jobName}} OK", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "builder", + "expr": "rate(not_oks[$__rate_interval]) > 0", + "hide": false, + "legendFormat": "({{sdk}}-{{sdkVersion}}) {{jobName}} not OK", + "range": true, + "refId": "C" + } + ], + "title": "SLO Requests RPS", + "transformations": [], + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "axisSoftMin": 0, + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 0 + }, + "id": 14, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "builder", + "expr": "histogram_quantile(1, rate(attempts_bucket[$__rate_interval]))", + "hide": false, + "legendFormat": "{{sdk}}-{{sdkVersion}} {{jobName}}-{{status}}", + "range": true, + "refId": "A" + } + ], + "title": "Attempts", + "transformations": [], + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 9 + }, + "id": 7, + "panels": [], + "title": "Latencies", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 10 + }, + "id": 4, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "9.3.1", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "builder", + "expr": "latency{jobName=\"read\", status=\"ok\"} > 0", + "legendFormat": "{{sdk}}-{{sdkVersion}}-p{{quantile}}", + "range": true, + "refId": "A" + } + ], + "title": "Read Latencies (OK)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 10 + }, + "id": 5, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "builder", + "expr": "latency{jobName=\"write\", status=\"ok\"} > 0", + "legendFormat": "{{sdk}}-{{sdkVersion}}-p{{quantile}}", + "range": true, + "refId": "A" + } + ], + "title": "Write Latencies (OK)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 18 + }, + "id": 10, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "builder", + "expr": "latency{jobName=\"read\", status=\"err\"} > 0", + "legendFormat": "{{sdk}}-{{sdkVersion}}-p{{quantile}}", + "range": true, + "refId": "A" + } + ], + "title": "Read Latencies (NOT OK)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 18 + }, + "id": 11, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "builder", + "expr": "latency{jobName=\"write\", status=\"err\"} > 0", + "legendFormat": "{{sdk}}-{{sdkVersion}}-p{{quantile}}", + "range": true, + "refId": "A" + } + ], + "title": "Write Latencies (NOT OK)", + "type": "timeseries" + } + ], + "refresh": "", + "revision": 1, + "schemaVersion": 38, + "style": "dark", + "tags": [], + "templating": { + "list": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "filters": [], + "hide": 0, + "label": "", + "name": "filter", + "skipUrlSync": false, + "type": "adhoc" + } + ] + }, + "time": { + "from": "now-15m", + "to": "now" + }, + "timepicker": {}, + "timezone": "", + "title": "SLO", + "uid": "7CzMl5t4k", + "version": 1, + "weekStart": "" +} diff --git a/docker/configs/grafana/provisioning/datasources/datasource.yml b/docker/configs/grafana/provisioning/datasources/datasource.yml new file mode 100644 index 00000000..8fef07c1 --- /dev/null +++ b/docker/configs/grafana/provisioning/datasources/datasource.yml @@ -0,0 +1,11 @@ +apiVersion: 1 + +datasources: + - name: prometheus + type: prometheus + access: proxy + orgId: 1 + url: http://localhost:9090 + basicAuth: false + isDefault: true + editable: true diff --git a/docker/configs/grafana/renderer/config.json b/docker/configs/grafana/renderer/config.json new file mode 100644 index 00000000..279fc28f --- /dev/null +++ b/docker/configs/grafana/renderer/config.json @@ -0,0 +1,52 @@ +{ + "service": { + "host": null, + "port": 10001, + "protocol": "http", + "certFile": "", + "certKey": "", + + "metrics": { + "enabled": false, + "collectDefaultMetrics": true, + "requestDurationBuckets": [1, 5, 7, 9, 11, 13, 15, 20, 30] + }, + + "logging": { + "level": "info", + "console": { + "json": true, + "colorize": false + } + } + }, + "rendering": { + "chromeBin": null, + "args": ["--no-sandbox", "--disable-gpu"], + "ignoresHttpsErrors": false, + + "timezone": null, + "acceptLanguage": null, + "width": 1000, + "height": 500, + "deviceScaleFactor": 1, + "maxWidth": 3080, + "maxHeight": 3000, + "maxDeviceScaleFactor": 4, + "pageZoomLevel": 1, + "headed": false, + + "mode": "default", + "emulateNetworkConditions": false, + "clustering": { + "monitor": false, + "mode": "browser", + "maxConcurrency": 5, + "timeout": 30 + }, + + "verboseLogging": false, + "dumpio": false, + "timingMetrics": false + } +} diff --git a/docker/configs/prometheus/prometheus.yml b/docker/configs/prometheus/prometheus.yml new file mode 100644 index 00000000..87b40097 --- /dev/null +++ b/docker/configs/prometheus/prometheus.yml @@ -0,0 +1,8 @@ +global: + scrape_interval: 1s + evaluation_interval: 1s + +scrape_configs: + - job_name: 'pushgateway' + static_configs: + - targets: ['localhost:9091'] diff --git a/docker/cfg/config.yaml b/docker/configs/ydb/config.yaml similarity index 100% rename from docker/cfg/config.yaml rename to docker/configs/ydb/config.yaml diff --git a/docker/gen.js b/docker/gen.js index 4253a39d..1ab77ebd 100644 --- a/docker/gen.js +++ b/docker/gen.js @@ -13,7 +13,7 @@ let YDB_IC_PORT = 19001 let generateStaticNode = () => /** YAML */` static-0: <<: *ydb-common - container_name: static-0 + container_name: ydb-static-0 command: - /opt/ydb/bin/ydbd - server @@ -39,6 +39,8 @@ let generateStaticNode = () => /** YAML */` timeout: 1s retries: 3 start_period: 30s + deploy: + <<: *ydb-deploy static-init: <<: *ydb-common @@ -74,13 +76,13 @@ let generateStaticNode = () => /** YAML */` depends_on: static-init: condition: service_completed_successfully -` +`.slice(1) // Generate YDB Dynamic Node let generateDynamicNode = (idx) => /** YAML */` dynamic-${idx}: <<: *ydb-common - container_name: dynamic-${idx} + container_name: ydb-dynamic-${idx} command: - /opt/ydb/bin/ydbd - server @@ -115,21 +117,100 @@ let generateDynamicNode = (idx) => /** YAML */` condition: service_completed_successfully tenant-init: condition: service_completed_successfully -` + deploy: + <<: *ydb-deploy +`.slice(1) -let composeFile = `x-template: &ydb-common +// Generate Monitoring +let generateMonitoring = () => /** YAML */` + prometheus: + image: prom/prometheus + restart: unless-stopped + ports: + - "9090:9090" + volumes: + - ./configs/prometheus/prometheus.yml:/etc/prometheus/prometheus.yml + network_mode: host + deploy: &monitoring-deploy + resources: + limits: + cpus: '0.1' + memory: 1000M + reservations: + cpus: '0.001' + memory: 50M + + prometheus-pushgateway: + image: prom/pushgateway + restart: unless-stopped + ports: + - "9091:9091" + network_mode: host + deploy: + <<: *monitoring-deploy + + grafana: + image: grafana/grafana-oss + restart: unless-stopped + platform: linux/amd64 + ports: + - "10000:10000" + volumes: + - ./configs/grafana/provisioning:/etc/grafana/provisioning + environment: + - GF_SERVER_HTTP_PORT=10000 + - GF_AUTH_DISABLE_LOGIN_FORM=true + - GF_AUTH_ANONYMOUS_ENABLED=true + - GF_AUTH_ANONYMOUS_ORG_NAME=Main Org. + - GF_AUTH_ANONYMOUS_ORG_ROLE=Admin + - GF_USERS_ALLOW_SIGN_UP=false + - GF_RENDERING_SERVER_URL=http://localhost:10001/render + - GF_RENDERING_CALLBACK_URL=http://localhost:10000/ + network_mode: host + deploy: + <<: *monitoring-deploy + + grafana-renderer: + image: grafana/grafana-image-renderer + ports: + - "10001:10001" + volumes: + - ./configs/grafana/renderer/config.json:/usr/src/app/config.json + network_mode: host + deploy: + <<: *monitoring-deploy +`.slice(1) + +let composeFile = ` +x-node: &ydb-common image: cr.yandex/crptqonuodf51kdj7a7d/ydb:24.2.7 restart: always hostname: localhost platform: linux/amd64 privileged: true + network_mode: host volumes: - - ./cfg/config.yaml:/opt/ydb/cfg/config.yaml + - ./configs/ydb/config.yaml:/opt/ydb/cfg/config.yaml + +x-deploy: &ydb-deploy + restart_policy: + condition: any + resources: + limits: + cpus: '1' + memory: 1000M + reservations: + cpus: '0.1' + memory: 250M name: ydb services: -${generateStaticNode()}${generateDynamicNode(1)}${generateDynamicNode(2)}${generateDynamicNode(3)} +${generateStaticNode()} +${generateDynamicNode(1)} +${generateDynamicNode(2)} +${generateDynamicNode(3)} +${generateMonitoring()} `; fs.writeFileSync('compose.yaml', composeFile);