Dashboards (provisioned via ConfigMaps into Grafana pod, 'K3s Cluster' folder): - Cluster Overview: per-namespace CPU/mem/net/fs, pod counts, pod health (KSM) - Pods & Services: per-pod CPU/mem/net/fs, throttling, pod status, restarts, PVCs - Nodes: per-node CPU%/mem%, load average, disk usage, network (node-exporter) - Control Plane & API Server: request rate, latency p95, 5xx, kubelet/PLEG - Prometheus Self-Monitoring: ingestion, series, scrape duration, memory Exporters (auto-scraped via existing kubernetes-service-endpoints job): - kube-state-metrics: pod/deployment/PVC/replica state (kube_pod_status_phase, kube_pod_container_status_restarts_total, kube_persistentvolumeclaim_*) - node-exporter (DaemonSet, hostNetwork): node_cpu_seconds_total, node_memory_*, node_filesystem_*, node_load*, node_network_*
313 lines
15 KiB
YAML
313 lines
15 KiB
YAML
apiVersion: v1
|
|
kind: ConfigMap
|
|
metadata:
|
|
name: grafana-dashboard-pods
|
|
namespace: monitoring
|
|
labels:
|
|
app: grafana
|
|
grafana_dashboard: "1"
|
|
data:
|
|
pods.json: |
|
|
{
|
|
"annotations": {"list": []},
|
|
"editable": true,
|
|
"graphTooltip": 1,
|
|
"id": null,
|
|
"links": [],
|
|
"liveNow": false,
|
|
"panels": [
|
|
{
|
|
"datasource": {"type": "prometheus", "uid": "Prometheus"},
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {"mode": "palette-classic"},
|
|
"custom": {
|
|
"drawStyle": "line", "fillOpacity": 10, "lineInterpolation": "linear", "lineWidth": 1, "showPoints": "never", "spanNulls": true,
|
|
"stacking": {"group": "A", "mode": "normal"}, "thresholdsStyle": {"mode": "off"}
|
|
},
|
|
"mappings": [],
|
|
"thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]},
|
|
"unit": "core"
|
|
},
|
|
"overrides": []
|
|
},
|
|
"gridPos": {"h": 9, "w": 24, "x": 0, "y": 0},
|
|
"id": 1,
|
|
"options": {
|
|
"legend": {"calcs": ["lastNotNull"], "displayMode": "table", "placement": "right", "showLegend": true},
|
|
"tooltip": {"mode": "multi", "sort": "desc"}
|
|
},
|
|
"targets": [{"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "sum(rate(container_cpu_usage_seconds_total{container!=\"\",container!=\"POD\",namespace=~\"$namespace\"}[5m])) by (pod)", "legendFormat": "{{pod}}", "refId": "A"}],
|
|
"title": "CPU Usage per Pod",
|
|
"type": "timeseries"
|
|
},
|
|
{
|
|
"datasource": {"type": "prometheus", "uid": "Prometheus"},
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {"mode": "palette-classic"},
|
|
"custom": {
|
|
"drawStyle": "line", "fillOpacity": 10, "lineInterpolation": "linear", "lineWidth": 1, "showPoints": "never", "spanNulls": true,
|
|
"stacking": {"group": "A", "mode": "normal"}, "thresholdsStyle": {"mode": "off"}
|
|
},
|
|
"mappings": [],
|
|
"thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]},
|
|
"unit": "bytes"
|
|
},
|
|
"overrides": []
|
|
},
|
|
"gridPos": {"h": 9, "w": 24, "x": 0, "y": 9},
|
|
"id": 2,
|
|
"options": {
|
|
"legend": {"calcs": ["lastNotNull"], "displayMode": "table", "placement": "right", "showLegend": true},
|
|
"tooltip": {"mode": "multi", "sort": "desc"}
|
|
},
|
|
"targets": [{"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "sum(container_memory_working_set_bytes{container!=\"\",container!=\"POD\",namespace=~\"$namespace\"}) by (pod)", "legendFormat": "{{pod}}", "refId": "A"}],
|
|
"title": "Memory Usage per Pod",
|
|
"type": "timeseries"
|
|
},
|
|
{
|
|
"datasource": {"type": "prometheus", "uid": "Prometheus"},
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {"mode": "palette-classic"},
|
|
"custom": {
|
|
"drawStyle": "line", "fillOpacity": 10, "lineInterpolation": "linear", "lineWidth": 1, "showPoints": "never", "spanNulls": true,
|
|
"stacking": {"group": "A", "mode": "none"}, "thresholdsStyle": {"mode": "off"}
|
|
},
|
|
"mappings": [],
|
|
"thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]},
|
|
"unit": "Bps"
|
|
},
|
|
"overrides": []
|
|
},
|
|
"gridPos": {"h": 9, "w": 12, "x": 0, "y": 18},
|
|
"id": 3,
|
|
"options": {
|
|
"legend": {"calcs": ["lastNotNull"], "displayMode": "table", "placement": "right", "showLegend": true},
|
|
"tooltip": {"mode": "multi", "sort": "desc"}
|
|
},
|
|
"targets": [
|
|
{"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "sum(rate(container_network_receive_bytes_total{namespace=~\"$namespace\"}[5m])) by (pod)", "legendFormat": "RX {{pod}}", "refId": "A"}
|
|
],
|
|
"title": "Network RX per Pod",
|
|
"type": "timeseries"
|
|
},
|
|
{
|
|
"datasource": {"type": "prometheus", "uid": "Prometheus"},
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {"mode": "palette-classic"},
|
|
"custom": {
|
|
"drawStyle": "line", "fillOpacity": 10, "lineInterpolation": "linear", "lineWidth": 1, "showPoints": "never", "spanNulls": true,
|
|
"stacking": {"group": "A", "mode": "none"}, "thresholdsStyle": {"mode": "off"}
|
|
},
|
|
"mappings": [],
|
|
"thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]},
|
|
"unit": "Bps"
|
|
},
|
|
"overrides": []
|
|
},
|
|
"gridPos": {"h": 9, "w": 12, "x": 12, "y": 18},
|
|
"id": 4,
|
|
"options": {
|
|
"legend": {"calcs": ["lastNotNull"], "displayMode": "table", "placement": "right", "showLegend": true},
|
|
"tooltip": {"mode": "multi", "sort": "desc"}
|
|
},
|
|
"targets": [
|
|
{"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "sum(rate(container_network_transmit_bytes_total{namespace=~\"$namespace\"}[5m])) by (pod)", "legendFormat": "TX {{pod}}", "refId": "A"}
|
|
],
|
|
"title": "Network TX per Pod",
|
|
"type": "timeseries"
|
|
},
|
|
{
|
|
"datasource": {"type": "prometheus", "uid": "Prometheus"},
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {"mode": "palette-classic"},
|
|
"custom": {
|
|
"drawStyle": "line", "fillOpacity": 10, "lineInterpolation": "linear", "lineWidth": 1, "showPoints": "never", "spanNulls": true,
|
|
"stacking": {"group": "A", "mode": "none"}, "thresholdsStyle": {"mode": "off"}
|
|
},
|
|
"mappings": [],
|
|
"thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]},
|
|
"unit": "bytes"
|
|
},
|
|
"overrides": []
|
|
},
|
|
"gridPos": {"h": 9, "w": 12, "x": 0, "y": 27},
|
|
"id": 5,
|
|
"options": {
|
|
"legend": {"calcs": ["lastNotNull"], "displayMode": "table", "placement": "right", "showLegend": true},
|
|
"tooltip": {"mode": "multi", "sort": "desc"}
|
|
},
|
|
"targets": [{"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "sum(container_fs_usage_bytes{namespace=~\"$namespace\"}) by (pod)", "legendFormat": "{{pod}}", "refId": "A"}],
|
|
"title": "Filesystem Usage per Pod",
|
|
"type": "timeseries"
|
|
},
|
|
{
|
|
"datasource": {"type": "prometheus", "uid": "Prometheus"},
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {"mode": "palette-classic"},
|
|
"custom": {
|
|
"drawStyle": "line", "fillOpacity": 10, "lineInterpolation": "linear", "lineWidth": 1, "showPoints": "never", "spanNulls": true,
|
|
"stacking": {"group": "A", "mode": "none"}, "thresholdsStyle": {"mode": "off"}
|
|
},
|
|
"mappings": [],
|
|
"thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]},
|
|
"unit": "percent"
|
|
},
|
|
"overrides": []
|
|
},
|
|
"gridPos": {"h": 9, "w": 12, "x": 12, "y": 27},
|
|
"id": 6,
|
|
"options": {
|
|
"legend": {"calcs": ["lastNotNull"], "displayMode": "table", "placement": "right", "showLegend": true},
|
|
"tooltip": {"mode": "multi", "sort": "desc"}
|
|
},
|
|
"targets": [{"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "sum(rate(container_cpu_cfs_throttled_seconds_total{namespace=~\"$namespace\"}[5m])) by (pod) / sum(rate(container_cpu_cfs_periods_total{namespace=~\"$namespace\"}[5m])) by (pod) * 100", "legendFormat": "{{pod}}", "refId": "A"}],
|
|
"title": "CPU Throttling % per Pod",
|
|
"type": "timeseries"
|
|
},
|
|
{
|
|
"datasource": {"type": "prometheus", "uid": "Prometheus"},
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {"mode": "thresholds"},
|
|
"thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}, {"color": "yellow", "value": 1}, {"color": "red", "value": 5}]},
|
|
"unit": "short"
|
|
},
|
|
"overrides": []
|
|
},
|
|
"gridPos": {"h": 10, "w": 24, "x": 0, "y": 36},
|
|
"id": 7,
|
|
"options": {
|
|
"showHeader": true,
|
|
"cellHeight": "sm",
|
|
"footer": {"show": false, "reducer": ["sum"], "countRows": false, "fields": ""}
|
|
},
|
|
"pluginVersion": "10.2.3",
|
|
"targets": [
|
|
{"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "sum by (namespace, pod) (container_memory_working_set_bytes{container!=\"\",container!=\"POD\",namespace=~\"$namespace\"})", "format": "table", "instant": true, "refId": "A"},
|
|
{"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "sum by (namespace, pod) (rate(container_cpu_usage_seconds_total{container!=\"\",container!=\"POD\",namespace=~\"$namespace\"}[5m]))", "format": "table", "instant": true, "refId": "B"},
|
|
{"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "sum by (namespace, pod) (rate(container_network_receive_bytes_total{namespace=~\"$namespace\"}[5m]))", "format": "table", "instant": true, "refId": "C"}
|
|
],
|
|
"title": "Pod Resource Summary (live)",
|
|
"type": "table",
|
|
"transformations": [
|
|
{"id": "merge", "options": {}},
|
|
{"id": "groupBy", "options": {"fields": {"Value": {"aggregations": ["lastNotNull"], "operation": "aggregate"}, "Value #B": {"aggregations": ["lastNotNull"], "operation": "aggregate"}, "Value #C": {"aggregations": ["lastNotNull"], "operation": "aggregate"}, "namespace": {"aggregations": [], "operation": "groupby"}, "pod": {"aggregations": [], "operation": "groupby"}}}},
|
|
{"id": "organize", "options": {"excludeByName": {"Time": true}, "renameByName": {"Value": "Memory (bytes)", "Value #B": "CPU (cores)", "Value #C": "Network RX (Bps)"}}}
|
|
]
|
|
},
|
|
{
|
|
"datasource": {"type": "prometheus", "uid": "Prometheus"},
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {"mode": "palette-classic"},
|
|
"custom": {
|
|
"drawStyle": "line", "fillOpacity": 10, "lineInterpolation": "linear", "lineWidth": 1, "showPoints": "never", "spanNulls": true,
|
|
"stacking": {"group": "A", "mode": "none"}, "thresholdsStyle": {"mode": "off"}
|
|
},
|
|
"mappings": [],
|
|
"thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]},
|
|
"unit": "short"
|
|
},
|
|
"overrides": []
|
|
},
|
|
"gridPos": {"h": 9, "w": 12, "x": 0, "y": 46},
|
|
"id": 8,
|
|
"options": {
|
|
"legend": {"calcs": ["lastNotNull"], "displayMode": "table", "placement": "right", "showLegend": true},
|
|
"tooltip": {"mode": "multi", "sort": "desc"}
|
|
},
|
|
"targets": [{"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "sum by (namespace) (kube_pod_status_phase{phase=~\"Running|Pending|Failed\",namespace=~\"$namespace\"})", "legendFormat": "{{namespace}} {{phase}}", "refId": "A"}],
|
|
"title": "Pod Status by Namespace (KSM)",
|
|
"type": "timeseries"
|
|
},
|
|
{
|
|
"datasource": {"type": "prometheus", "uid": "Prometheus"},
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {"mode": "palette-classic"},
|
|
"custom": {
|
|
"drawStyle": "line", "fillOpacity": 10, "lineInterpolation": "linear", "lineWidth": 1, "showPoints": "never", "spanNulls": true,
|
|
"stacking": {"group": "A", "mode": "none"}, "thresholdsStyle": {"mode": "off"}
|
|
},
|
|
"mappings": [],
|
|
"thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]},
|
|
"unit": "short"
|
|
},
|
|
"overrides": []
|
|
},
|
|
"gridPos": {"h": 9, "w": 12, "x": 12, "y": 46},
|
|
"id": 9,
|
|
"options": {
|
|
"legend": {"calcs": ["lastNotNull"], "displayMode": "table", "placement": "right", "showLegend": true},
|
|
"tooltip": {"mode": "multi", "sort": "desc"}
|
|
},
|
|
"targets": [{"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "sum by (namespace) (increase(kube_pod_container_status_restarts_total{namespace=~\"$namespace\"}[1h]))", "legendFormat": "{{namespace}}", "refId": "A"}],
|
|
"title": "Container Restarts (last 1h)",
|
|
"type": "timeseries"
|
|
},
|
|
{
|
|
"datasource": {"type": "prometheus", "uid": "Prometheus"},
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {"mode": "palette-classic"},
|
|
"custom": {
|
|
"drawStyle": "line", "fillOpacity": 10, "lineInterpolation": "linear", "lineWidth": 1, "showPoints": "never", "spanNulls": true,
|
|
"stacking": {"group": "A", "mode": "none"}, "thresholdsStyle": {"mode": "off"}
|
|
},
|
|
"mappings": [],
|
|
"thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]},
|
|
"unit": "bytes"
|
|
},
|
|
"overrides": []
|
|
},
|
|
"gridPos": {"h": 9, "w": 24, "x": 0, "y": 55},
|
|
"id": 10,
|
|
"options": {
|
|
"legend": {"calcs": ["lastNotNull"], "displayMode": "table", "placement": "right", "showLegend": true},
|
|
"tooltip": {"mode": "multi", "sort": "desc"}
|
|
},
|
|
"targets": [{"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "kube_persistentvolumeclaim_resource_requests_storage_bytes{namespace=~\"$namespace\"}", "legendFormat": "{{namespace}}/{{persistentvolumeclaim}}", "refId": "A"}],
|
|
"title": "PVC Storage Requests by Claim (KSM)",
|
|
"type": "timeseries"
|
|
}
|
|
],
|
|
"refresh": "30s",
|
|
"schemaVersion": 38,
|
|
"style": "dark",
|
|
"tags": ["k3s", "pods"],
|
|
"templating": {
|
|
"list": [
|
|
{
|
|
"allValue": ".*",
|
|
"current": {"selected": true, "text": "All", "value": "$__all"},
|
|
"datasource": {"type": "prometheus", "uid": "Prometheus"},
|
|
"definition": "label_values(container_cpu_usage_seconds_total, namespace)",
|
|
"hide": 0,
|
|
"includeAll": true,
|
|
"multi": true,
|
|
"name": "namespace",
|
|
"options": [],
|
|
"query": "label_values(container_cpu_usage_seconds_total, namespace)",
|
|
"refresh": 2,
|
|
"regex": "",
|
|
"skipUrlSync": false,
|
|
"sort": 1,
|
|
"type": "query"
|
|
}
|
|
]
|
|
},
|
|
"time": {"from": "now-6h", "to": "now"},
|
|
"timepicker": {},
|
|
"timezone": "",
|
|
"title": "Pods & Services",
|
|
"uid": "k3s-pods",
|
|
"version": 2,
|
|
"weekStart": ""
|
|
}
|