monitoring: add Grafana dashboards + kube-state-metrics & node-exporter
Dashboards (provisioned via ConfigMaps into Grafana pod, 'K3s Cluster' folder): - Cluster Overview: per-namespace CPU/mem/net/fs, pod counts, pod health (KSM) - Pods & Services: per-pod CPU/mem/net/fs, throttling, pod status, restarts, PVCs - Nodes: per-node CPU%/mem%, load average, disk usage, network (node-exporter) - Control Plane & API Server: request rate, latency p95, 5xx, kubelet/PLEG - Prometheus Self-Monitoring: ingestion, series, scrape duration, memory Exporters (auto-scraped via existing kubernetes-service-endpoints job): - kube-state-metrics: pod/deployment/PVC/replica state (kube_pod_status_phase, kube_pod_container_status_restarts_total, kube_persistentvolumeclaim_*) - node-exporter (DaemonSet, hostNetwork): node_cpu_seconds_total, node_memory_*, node_filesystem_*, node_load*, node_network_*
This commit is contained in:
218
monitoring/grafana-dashboard-prometheus.yaml
Normal file
218
monitoring/grafana-dashboard-prometheus.yaml
Normal file
@@ -0,0 +1,218 @@
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: grafana-dashboard-prometheus
|
||||
namespace: monitoring
|
||||
labels:
|
||||
app: grafana
|
||||
grafana_dashboard: "1"
|
||||
data:
|
||||
prometheus.json: |
|
||||
{
|
||||
"annotations": {"list": []},
|
||||
"editable": true,
|
||||
"graphTooltip": 1,
|
||||
"id": null,
|
||||
"links": [],
|
||||
"liveNow": false,
|
||||
"panels": [
|
||||
{
|
||||
"datasource": {"type": "prometheus", "uid": "Prometheus"},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {"mode": "thresholds"},
|
||||
"thresholds": {"mode": "absolute", "steps": [{"color": "red", "value": null}, {"color": "green", "value": 1}]},
|
||||
"mappings": [{"options": {"0": {"text": "DOWN", "color": "red"}, "1": {"text": "UP", "color": "green"}}, "type": "value"}]
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {"h": 5, "w": 6, "x": 0, "y": 0},
|
||||
"id": 1,
|
||||
"options": {"colorMode": "background", "graphMode": "none", "justifyMode": "center", "orientation": "horizontal", "reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false}, "textMode": "value"},
|
||||
"pluginVersion": "10.2.3",
|
||||
"targets": [{"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "up{job=\"prometheus\"}", "refId": "A"}],
|
||||
"title": "Prometheus Status",
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"datasource": {"type": "prometheus", "uid": "Prometheus"},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {"mode": "thresholds"},
|
||||
"thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]},
|
||||
"unit": "bytes"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {"h": 5, "w": 6, "x": 6, "y": 0},
|
||||
"id": 2,
|
||||
"options": {"colorMode": "value", "graphMode": "area", "justifyMode": "auto", "orientation": "auto", "reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false}, "textMode": "auto"},
|
||||
"pluginVersion": "10.2.3",
|
||||
"targets": [{"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "process_resident_memory_bytes{job=\"prometheus\"}", "refId": "A"}],
|
||||
"title": "Prometheus RSS Memory",
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"datasource": {"type": "prometheus", "uid": "Prometheus"},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {"mode": "thresholds"},
|
||||
"thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]},
|
||||
"unit": "short"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {"h": 5, "w": 6, "x": 12, "y": 0},
|
||||
"id": 3,
|
||||
"options": {"colorMode": "value", "graphMode": "area", "justifyMode": "auto", "orientation": "auto", "reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false}, "textMode": "auto"},
|
||||
"pluginVersion": "10.2.3",
|
||||
"targets": [{"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "prometheus_tsdb_head_series", "refId": "A"}],
|
||||
"title": "Active Series",
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"datasource": {"type": "prometheus", "uid": "Prometheus"},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {"mode": "thresholds"},
|
||||
"thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]},
|
||||
"unit": "short"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {"h": 5, "w": 6, "x": 18, "y": 0},
|
||||
"id": 4,
|
||||
"options": {"colorMode": "value", "graphMode": "area", "justifyMode": "auto", "orientation": "auto", "reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false}, "textMode": "auto"},
|
||||
"pluginVersion": "10.2.3",
|
||||
"targets": [{"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "count(up)", "refId": "A"}],
|
||||
"title": "Scrape Targets",
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"datasource": {"type": "prometheus", "uid": "Prometheus"},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {"mode": "palette-classic"},
|
||||
"custom": {"drawStyle": "line", "fillOpacity": 10, "lineInterpolation": "linear", "lineWidth": 1, "showPoints": "never", "spanNulls": true, "stacking": {"group": "A", "mode": "none"}, "thresholdsStyle": {"mode": "off"}},
|
||||
"mappings": [],
|
||||
"thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]},
|
||||
"unit": "bytes"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {"h": 9, "w": 12, "x": 0, "y": 5},
|
||||
"id": 10,
|
||||
"options": {"legend": {"calcs": ["lastNotNull"], "displayMode": "table", "placement": "right", "showLegend": true}, "tooltip": {"mode": "multi", "sort": "desc"}},
|
||||
"targets": [{"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "process_resident_memory_bytes{job=\"prometheus\"}", "legendFormat": "RSS", "refId": "A"}, {"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "prometheus_tsdb_head_memory_postings_total", "legendFormat": "postings", "refId": "B"}],
|
||||
"title": "Prometheus Memory",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {"type": "prometheus", "uid": "Prometheus"},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {"mode": "palette-classic"},
|
||||
"custom": {"drawStyle": "line", "fillOpacity": 10, "lineInterpolation": "linear", "lineWidth": 1, "showPoints": "never", "spanNulls": true, "stacking": {"group": "A", "mode": "none"}, "thresholdsStyle": {"mode": "off"}},
|
||||
"mappings": [],
|
||||
"thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]},
|
||||
"unit": "core"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {"h": 9, "w": 12, "x": 12, "y": 5},
|
||||
"id": 11,
|
||||
"options": {"legend": {"calcs": ["lastNotNull"], "displayMode": "table", "placement": "right", "showLegend": true}, "tooltip": {"mode": "multi", "sort": "desc"}},
|
||||
"targets": [{"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "rate(process_cpu_seconds_total{job=\"prometheus\"}[5m])", "legendFormat": "prometheus", "refId": "A"}],
|
||||
"title": "Prometheus CPU",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {"type": "prometheus", "uid": "Prometheus"},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {"mode": "palette-classic"},
|
||||
"custom": {"drawStyle": "line", "fillOpacity": 10, "lineInterpolation": "linear", "lineWidth": 1, "showPoints": "never", "spanNulls": true, "stacking": {"group": "A", "mode": "none"}, "thresholdsStyle": {"mode": "off"}},
|
||||
"mappings": [],
|
||||
"thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]},
|
||||
"unit": "short"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {"h": 9, "w": 12, "x": 0, "y": 14},
|
||||
"id": 12,
|
||||
"options": {"legend": {"calcs": ["lastNotNull"], "displayMode": "table", "placement": "right", "showLegend": true}, "tooltip": {"mode": "multi", "sort": "desc"}},
|
||||
"targets": [{"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "rate(prometheus_tsdb_head_samples_appended_total[5m])", "legendFormat": "samples/s", "refId": "A"}],
|
||||
"title": "Ingestion Rate (samples/s)",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {"type": "prometheus", "uid": "Prometheus"},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {"mode": "palette-classic"},
|
||||
"custom": {"drawStyle": "line", "fillOpacity": 10, "lineInterpolation": "linear", "lineWidth": 1, "showPoints": "never", "spanNulls": true, "stacking": {"group": "A", "mode": "none"}, "thresholdsStyle": {"mode": "off"}},
|
||||
"mappings": [],
|
||||
"thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]},
|
||||
"unit": "s"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {"h": 9, "w": 12, "x": 12, "y": 14},
|
||||
"id": 13,
|
||||
"options": {"legend": {"calcs": ["lastNotNull"], "displayMode": "table", "placement": "right", "showLegend": true}, "tooltip": {"mode": "multi", "sort": "desc"}},
|
||||
"targets": [{"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "scrape_duration_seconds", "legendFormat": "{{job}} {{instance}}", "refId": "A"}],
|
||||
"title": "Scrape Duration by Job",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {"type": "prometheus", "uid": "Prometheus"},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {"mode": "palette-classic"},
|
||||
"custom": {"drawStyle": "line", "fillOpacity": 10, "lineInterpolation": "linear", "lineWidth": 1, "showPoints": "never", "spanNulls": true, "stacking": {"group": "A", "mode": "none"}, "thresholdsStyle": {"mode": "off"}},
|
||||
"mappings": [],
|
||||
"thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]},
|
||||
"unit": "bytes"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {"h": 9, "w": 12, "x": 0, "y": 23},
|
||||
"id": 14,
|
||||
"options": {"legend": {"calcs": ["lastNotNull"], "displayMode": "table", "placement": "right", "showLegend": true}, "tooltip": {"mode": "multi", "sort": "desc"}},
|
||||
"targets": [{"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "prometheus_tsdb_head_series", "legendFormat": "head series", "refId": "A"}, {"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "prometheus_tsdb_head_chunks", "legendFormat": "head chunks", "refId": "B"}],
|
||||
"title": "TSDB Head Series & Chunks",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {"type": "prometheus", "uid": "Prometheus"},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {"mode": "palette-classic"},
|
||||
"custom": {"drawStyle": "line", "fillOpacity": 10, "lineInterpolation": "linear", "lineWidth": 1, "showPoints": "never", "spanNulls": true, "stacking": {"group": "A", "mode": "none"}, "thresholdsStyle": {"mode": "off"}},
|
||||
"mappings": [],
|
||||
"thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]},
|
||||
"unit": "s"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {"h": 9, "w": 12, "x": 12, "y": 23},
|
||||
"id": 15,
|
||||
"options": {"legend": {"calcs": ["lastNotNull"], "displayMode": "table", "placement": "right", "showLegend": true}, "tooltip": {"mode": "multi", "sort": "desc"}},
|
||||
"targets": [{"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "rate(prometheus_http_request_duration_seconds_sum[5m]) / rate(prometheus_http_request_duration_seconds_count[5m])", "legendFormat": "avg HTTP req", "refId": "A"}],
|
||||
"title": "Prometheus HTTP Request Duration",
|
||||
"type": "timeseries"
|
||||
}
|
||||
],
|
||||
"refresh": "30s",
|
||||
"schemaVersion": 38,
|
||||
"style": "dark",
|
||||
"tags": ["k3s", "prometheus"],
|
||||
"templating": {"list": []},
|
||||
"time": {"from": "now-6h", "to": "now"},
|
||||
"timepicker": {},
|
||||
"timezone": "",
|
||||
"title": "Prometheus Self-Monitoring",
|
||||
"uid": "k3s-prometheus",
|
||||
"version": 1,
|
||||
"weekStart": ""
|
||||
}
|
||||
Reference in New Issue
Block a user