From bf1387dc3e5b0896dd9b7feb83e3d54dd160e77f Mon Sep 17 00:00:00 2001 From: Roger Oriol Date: Fri, 26 Jun 2026 19:34:27 +0200 Subject: [PATCH] monitoring: add Grafana dashboards + kube-state-metrics & node-exporter Dashboards (provisioned via ConfigMaps into Grafana pod, 'K3s Cluster' folder): - Cluster Overview: per-namespace CPU/mem/net/fs, pod counts, pod health (KSM) - Pods & Services: per-pod CPU/mem/net/fs, throttling, pod status, restarts, PVCs - Nodes: per-node CPU%/mem%, load average, disk usage, network (node-exporter) - Control Plane & API Server: request rate, latency p95, 5xx, kubelet/PLEG - Prometheus Self-Monitoring: ingestion, series, scrape duration, memory Exporters (auto-scraped via existing kubernetes-service-endpoints job): - kube-state-metrics: pod/deployment/PVC/replica state (kube_pod_status_phase, kube_pod_container_status_restarts_total, kube_persistentvolumeclaim_*) - node-exporter (DaemonSet, hostNetwork): node_cpu_seconds_total, node_memory_*, node_filesystem_*, node_load*, node_network_* --- .../grafana-dashboard-cluster-overview.yaml | 331 ++++++++++++++++++ .../grafana-dashboard-control-plane.yaml | 209 +++++++++++ monitoring/grafana-dashboard-nodes.yaml | 279 +++++++++++++++ monitoring/grafana-dashboard-pods.yaml | 312 +++++++++++++++++ monitoring/grafana-dashboard-prometheus.yaml | 218 ++++++++++++ monitoring/grafana-dashboard-provider.yaml | 20 ++ monitoring/grafana-deployment.yaml | 30 ++ monitoring/kube-state-metrics.yaml | 118 +++++++ monitoring/node-exporter.yaml | 112 ++++++ 9 files changed, 1629 insertions(+) create mode 100644 monitoring/grafana-dashboard-cluster-overview.yaml create mode 100644 monitoring/grafana-dashboard-control-plane.yaml create mode 100644 monitoring/grafana-dashboard-nodes.yaml create mode 100644 monitoring/grafana-dashboard-pods.yaml create mode 100644 monitoring/grafana-dashboard-prometheus.yaml create mode 100644 monitoring/grafana-dashboard-provider.yaml create mode 100644 monitoring/kube-state-metrics.yaml create mode 100644 monitoring/node-exporter.yaml diff --git a/monitoring/grafana-dashboard-cluster-overview.yaml b/monitoring/grafana-dashboard-cluster-overview.yaml new file mode 100644 index 0000000..ac986ef --- /dev/null +++ b/monitoring/grafana-dashboard-cluster-overview.yaml @@ -0,0 +1,331 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: grafana-dashboard-cluster-overview + namespace: monitoring + labels: + app: grafana + grafana_dashboard: "1" +data: + cluster-overview.json: | + { + "annotations": {"list": []}, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 1, + "id": null, + "links": [], + "liveNow": false, + "panels": [ + { + "datasource": {"type": "prometheus", "uid": "Prometheus"}, + "fieldConfig": { + "defaults": { + "color": {"mode": "thresholds"}, + "thresholds": { + "mode": "absolute", + "steps": [ + {"color": "green", "value": null} + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": {"h": 5, "w": 4, "x": 0, "y": 0}, + "id": 1, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false}, + "textMode": "auto" + }, + "pluginVersion": "10.2.3", + "targets": [{"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "time() - max(process_start_time_seconds{job=\"prometheus\"})", "refId": "A"}], + "title": "Prometheus Uptime", + "type": "stat" + }, + { + "datasource": {"type": "prometheus", "uid": "Prometheus"}, + "fieldConfig": { + "defaults": { + "color": {"mode": "thresholds"}, + "thresholds": { + "mode": "absolute", + "steps": [ + {"color": "red", "value": null}, + {"color": "green", "value": 1} + ] + } + }, + "overrides": [] + }, + "gridPos": {"h": 5, "w": 4, "x": 4, "y": 0}, + "id": 2, + "options": { + "colorMode": "background", + "graphMode": "none", + "justifyMode": "center", + "orientation": "horizontal", + "reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false}, + "textMode": "value_and_name" + }, + "pluginVersion": "10.2.3", + "targets": [{"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "count(kubelet_running_pods)", "refId": "A"}], + "title": "Running Pods (total)", + "type": "stat" + }, + { + "datasource": {"type": "prometheus", "uid": "Prometheus"}, + "fieldConfig": { + "defaults": { + "color": {"mode": "thresholds"}, + "thresholds": { + "mode": "absolute", + "steps": [ + {"color": "green", "value": null} + ] + } + }, + "overrides": [] + }, + "gridPos": {"h": 5, "w": 4, "x": 8, "y": 0}, + "id": 3, + "options": { + "colorMode": "background", + "graphMode": "none", + "justifyMode": "center", + "orientation": "horizontal", + "reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false}, + "textMode": "value_and_name" + }, + "pluginVersion": "10.2.3", + "targets": [{"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "sum(kubelet_running_containers)", "refId": "A"}], + "title": "Running Containers", + "type": "stat" + }, + { + "datasource": {"type": "prometheus", "uid": "Prometheus"}, + "fieldConfig": { + "defaults": { + "color": {"mode": "thresholds"}, + "mappings": [ + {"options": {"0": {"text": "Down", "color": "red"}, "1": {"text": "Up", "color": "green"}}, "type": "value"} + ], + "thresholds": { + "mode": "absolute", + "steps": [ + {"color": "red", "value": null}, + {"color": "green", "value": 1} + ] + } + }, + "overrides": [] + }, + "gridPos": {"h": 5, "w": 12, "x": 12, "y": 0}, + "id": 4, + "options": { + "colorMode": "background", + "graphMode": "none", + "justifyMode": "center", + "orientation": "horizontal", + "reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false}, + "textMode": "value_and_name" + }, + "pluginVersion": "10.2.3", + "targets": [{"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "up{job=\"kubernetes-apiservers\"}", "refId": "A"}, {"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "up{job=\"kubernetes-nodes\"}", "refId": "B"}], + "title": "Control Plane & Node Exporters", + "type": "stat" + }, + { + "datasource": {"type": "prometheus", "uid": "Prometheus"}, + "fieldConfig": { + "defaults": { + "color": {"mode": "palette-classic"}, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": {"legend": false, "tooltip": false, "viz": false}, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": {"type": "linear"}, + "showPoints": "never", + "spanNulls": true, + "stacking": {"group": "A", "mode": "none"}, + "thresholdsStyle": {"mode": "off"} + }, + "mappings": [], + "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]}, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": {"h": 9, "w": 12, "x": 0, "y": 5}, + "id": 10, + "options": { + "legend": {"calcs": ["lastNotNull"], "displayMode": "table", "placement": "right", "showLegend": true}, + "tooltip": {"mode": "multi", "sort": "desc"} + }, + "targets": [{"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "sum(container_memory_working_set_bytes{container!=\"\",container!=\"POD\"}) by (namespace)", "legendFormat": "{{namespace}}", "refId": "A"}], + "title": "Memory Usage by Namespace", + "type": "timeseries" + }, + { + "datasource": {"type": "prometheus", "uid": "Prometheus"}, + "fieldConfig": { + "defaults": { + "color": {"mode": "palette-classic"}, + "custom": { + "drawStyle": "line", "fillOpacity": 10, "lineInterpolation": "linear", "lineWidth": 1, "showPoints": "never", "spanNulls": true, + "stacking": {"group": "A", "mode": "none"}, "thresholdsStyle": {"mode": "off"} + }, + "mappings": [], + "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]}, + "unit": "core" + }, + "overrides": [] + }, + "gridPos": {"h": 9, "w": 12, "x": 12, "y": 5}, + "id": 11, + "options": { + "legend": {"calcs": ["lastNotNull"], "displayMode": "table", "placement": "right", "showLegend": true}, + "tooltip": {"mode": "multi", "sort": "desc"} + }, + "targets": [{"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "sum(rate(container_cpu_usage_seconds_total{container!=\"\",container!=\"POD\"}[5m])) by (namespace)", "legendFormat": "{{namespace}}", "refId": "A"}], + "title": "CPU Usage by Namespace", + "type": "timeseries" + }, + { + "datasource": {"type": "prometheus", "uid": "Prometheus"}, + "fieldConfig": { + "defaults": { + "color": {"mode": "palette-classic"}, + "custom": { + "drawStyle": "line", "fillOpacity": 10, "lineInterpolation": "linear", "lineWidth": 1, "showPoints": "never", "spanNulls": true, + "stacking": {"group": "A", "mode": "none"}, "thresholdsStyle": {"mode": "off"} + }, + "mappings": [], + "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]}, + "unit": "Bps" + }, + "overrides": [] + }, + "gridPos": {"h": 9, "w": 12, "x": 0, "y": 14}, + "id": 12, + "options": { + "legend": {"calcs": ["lastNotNull"], "displayMode": "table", "placement": "right", "showLegend": true}, + "tooltip": {"mode": "multi", "sort": "desc"} + }, + "targets": [ + {"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "sum(rate(container_network_receive_bytes_total[5m])) by (namespace)", "legendFormat": "RX {{namespace}}", "refId": "A"}, + {"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "sum(rate(container_network_transmit_bytes_total[5m])) by (namespace)", "legendFormat": "TX {{namespace}}", "refId": "B"} + ], + "title": "Network RX/TX by Namespace", + "type": "timeseries" + }, + { + "datasource": {"type": "prometheus", "uid": "Prometheus"}, + "fieldConfig": { + "defaults": { + "color": {"mode": "palette-classic"}, + "custom": { + "drawStyle": "line", "fillOpacity": 10, "lineInterpolation": "linear", "lineWidth": 1, "showPoints": "never", "spanNulls": true, + "stacking": {"group": "A", "mode": "none"}, "thresholdsStyle": {"mode": "off"} + }, + "mappings": [], + "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]}, + "unit": "decbytes" + }, + "overrides": [] + }, + "gridPos": {"h": 9, "w": 12, "x": 12, "y": 14}, + "id": 13, + "options": { + "legend": {"calcs": ["lastNotNull"], "displayMode": "table", "placement": "right", "showLegend": true}, + "tooltip": {"mode": "multi", "sort": "desc"} + }, + "targets": [{"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "sum(container_fs_usage_bytes) by (instance)", "legendFormat": "{{instance}}", "refId": "A"}], + "title": "Filesystem Usage by Node", + "type": "timeseries" + }, + { + "datasource": {"type": "prometheus", "uid": "Prometheus"}, + "fieldConfig": { + "defaults": { + "color": {"mode": "thresholds"}, + "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]}, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": {"h": 9, "w": 24, "x": 0, "y": 23}, + "id": 20, + "options": { + "showHeader": true, + "cellHeight": "sm", + "footer": {"show": false, "reducer": ["sum"], "countRows": false, "fields": ""} + }, + "pluginVersion": "10.2.3", + "targets": [{"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "sort_desc(sum(container_memory_working_set_bytes{container!=\"\",container!=\"POD\"}) by (namespace,pod))", "format": "table", "instant": true, "refId": "A"}], + "title": "Pods by Memory (live)", + "type": "table", + "transformations": [ + {"id": "organize", "options": {"excludeByName": {"Time": true}, "renameByName": {"Value": "Memory (bytes)"}}} + ] + }, + { + "datasource": {"type": "prometheus", "uid": "Prometheus"}, + "fieldConfig": { + "defaults": { + "color": {"mode": "thresholds"}, + "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}, {"color": "orange", "value": 1}, {"color": "red", "value": 5}]}, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": {"h": 9, "w": 24, "x": 0, "y": 32}, + "id": 30, + "options": { + "showHeader": true, + "cellHeight": "sm", + "footer": {"show": false, "reducer": ["sum"], "countRows": false, "fields": ""} + }, + "pluginVersion": "10.2.3", + "targets": [ + {"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "sum(kube_pod_status_phase{phase=\"Running\"}) by (namespace)", "format": "table", "instant": true, "refId": "A"}, + {"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "sum(kube_pod_status_phase{phase=\"Pending\"}) by (namespace)", "format": "table", "instant": true, "refId": "B"}, + {"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "sum(kube_pod_status_phase{phase=\"Failed\"}) by (namespace)", "format": "table", "instant": true, "refId": "C"}, + {"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "sum(increase(kube_pod_container_status_restarts_total[1h])) by (namespace)", "format": "table", "instant": true, "refId": "D"} + ], + "title": "Pod Health by Namespace (KSM)", + "type": "table", + "transformations": [ + {"id": "merge", "options": {}}, + {"id": "groupBy", "options": {"fields": {"Value": {"aggregations": ["lastNotNull"], "operation": "aggregate"}, "Value #B": {"aggregations": ["lastNotNull"], "operation": "aggregate"}, "Value #C": {"aggregations": ["lastNotNull"], "operation": "aggregate"}, "Value #D": {"aggregations": ["lastNotNull"], "operation": "aggregate"}, "namespace": {"aggregations": [], "operation": "groupby"}}}}, + {"id": "organize", "options": {"excludeByName": {"Time": true}, "renameByName": {"Value": "Running", "Value #B": "Pending", "Value #C": "Failed", "Value #D": "Restarts (1h)"}}} + ] + } + ], + "refresh": "30s", + "schemaVersion": 38, + "style": "dark", + "tags": ["k3s", "overview"], + "templating": {"list": []}, + "time": {"from": "now-6h", "to": "now"}, + "timepicker": {}, + "timezone": "", + "title": "Cluster Overview", + "uid": "k3s-cluster-overview", + "version": 2, + "weekStart": "" + } diff --git a/monitoring/grafana-dashboard-control-plane.yaml b/monitoring/grafana-dashboard-control-plane.yaml new file mode 100644 index 0000000..5fdf8b7 --- /dev/null +++ b/monitoring/grafana-dashboard-control-plane.yaml @@ -0,0 +1,209 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: grafana-dashboard-control-plane + namespace: monitoring + labels: + app: grafana + grafana_dashboard: "1" +data: + control-plane.json: | + { + "annotations": {"list": []}, + "editable": true, + "graphTooltip": 1, + "id": null, + "links": [], + "liveNow": false, + "panels": [ + { + "datasource": {"type": "prometheus", "uid": "Prometheus"}, + "fieldConfig": { + "defaults": { + "color": {"mode": "palette-classic"}, + "custom": { + "drawStyle": "line", "fillOpacity": 10, "lineInterpolation": "linear", "lineWidth": 1, "showPoints": "never", "spanNulls": true, + "stacking": {"group": "A", "mode": "none"}, "thresholdsStyle": {"mode": "off"} + }, + "mappings": [], + "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]}, + "unit": "reqps" + }, + "overrides": [] + }, + "gridPos": {"h": 9, "w": 12, "x": 0, "y": 0}, + "id": 1, + "options": { + "legend": {"calcs": ["lastNotNull"], "displayMode": "table", "placement": "right", "showLegend": true}, + "tooltip": {"mode": "multi", "sort": "desc"} + }, + "targets": [{"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "sum(rate(apiserver_request_total[5m])) by (verb)", "legendFormat": "{{verb}}", "refId": "A"}], + "title": "API Server Requests by Verb", + "type": "timeseries" + }, + { + "datasource": {"type": "prometheus", "uid": "Prometheus"}, + "fieldConfig": { + "defaults": { + "color": {"mode": "palette-classic"}, + "custom": { + "drawStyle": "line", "fillOpacity": 10, "lineInterpolation": "linear", "lineWidth": 1, "showPoints": "never", "spanNulls": true, + "stacking": {"group": "A", "mode": "none"}, "thresholdsStyle": {"mode": "off"} + }, + "mappings": [], + "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]}, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": {"h": 9, "w": 12, "x": 12, "y": 0}, + "id": 2, + "options": { + "legend": {"calcs": ["lastNotNull"], "displayMode": "table", "placement": "right", "showLegend": true}, + "tooltip": {"mode": "multi", "sort": "desc"} + }, + "targets": [{"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "histogram_quantile(0.95, sum(rate(apiserver_request_duration_seconds_bucket[5m])) by (le, verb))", "legendFormat": "p95 {{verb}}", "refId": "A"}], + "title": "API Server Request Latency p95", + "type": "timeseries" + }, + { + "datasource": {"type": "prometheus", "uid": "Prometheus"}, + "fieldConfig": { + "defaults": { + "color": {"mode": "palette-classic"}, + "custom": { + "drawStyle": "line", "fillOpacity": 10, "lineInterpolation": "linear", "lineWidth": 1, "showPoints": "never", "spanNulls": true, + "stacking": {"group": "A", "mode": "none"}, "thresholdsStyle": {"mode": "off"} + }, + "mappings": [], + "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]}, + "unit": "ops" + }, + "overrides": [] + }, + "gridPos": {"h": 9, "w": 12, "x": 0, "y": 9}, + "id": 3, + "options": { + "legend": {"calcs": ["lastNotNull"], "displayMode": "table", "placement": "right", "showLegend": true}, + "tooltip": {"mode": "multi", "sort": "desc"} + }, + "targets": [{"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "sum(rate(apiserver_request_total{code=~\"5..\"}[5m])) by (verb)", "legendFormat": "{{verb}}", "refId": "A"}], + "title": "API Server 5xx Errors", + "type": "timeseries" + }, + { + "datasource": {"type": "prometheus", "uid": "Prometheus"}, + "fieldConfig": { + "defaults": { + "color": {"mode": "palette-classic"}, + "custom": { + "drawStyle": "line", "fillOpacity": 10, "lineInterpolation": "linear", "lineWidth": 1, "showPoints": "never", "spanNulls": true, + "stacking": {"group": "A", "mode": "none"}, "thresholdsStyle": {"mode": "off"} + }, + "mappings": [], + "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]}, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": {"h": 9, "w": 12, "x": 12, "y": 9}, + "id": 4, + "options": { + "legend": {"calcs": ["lastNotNull"], "displayMode": "table", "placement": "right", "showLegend": true}, + "tooltip": {"mode": "multi", "sort": "desc"} + }, + "targets": [{"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "sum(rate(kubelet_container_log_filesystem_used_bytes[5m]))", "legendFormat": "log fs", "refId": "A"}, {"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "histogram_quantile(0.95, sum(rate(kubelet_pod_start_duration_seconds_bucket[5m])) by (le))", "legendFormat": "pod start p95", "refId": "B"}], + "title": "Kubelet Pod Start Latency", + "type": "timeseries" + }, + { + "datasource": {"type": "prometheus", "uid": "Prometheus"}, + "fieldConfig": { + "defaults": { + "color": {"mode": "palette-classic"}, + "custom": { + "drawStyle": "line", "fillOpacity": 10, "lineInterpolation": "linear", "lineWidth": 1, "showPoints": "never", "spanNulls": true, + "stacking": {"group": "A", "mode": "none"}, "thresholdsStyle": {"mode": "off"} + }, + "mappings": [], + "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]}, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": {"h": 9, "w": 12, "x": 0, "y": 18}, + "id": 5, + "options": { + "legend": {"calcs": ["lastNotNull"], "displayMode": "table", "placement": "right", "showLegend": true}, + "tooltip": {"mode": "multi", "sort": "desc"} + }, + "targets": [{"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "histogram_quantile(0.95, sum(rate(kubelet_cgroup_manager_duration_seconds_bucket[5m])) by (le, instance))", "legendFormat": "{{instance}}", "refId": "A"}], + "title": "Kubelet Cgroup Manager Duration p95", + "type": "timeseries" + }, + { + "datasource": {"type": "prometheus", "uid": "Prometheus"}, + "fieldConfig": { + "defaults": { + "color": {"mode": "palette-classic"}, + "custom": { + "drawStyle": "line", "fillOpacity": 10, "lineInterpolation": "linear", "lineWidth": 1, "showPoints": "never", "spanNulls": true, + "stacking": {"group": "A", "mode": "none"}, "thresholdsStyle": {"mode": "off"} + }, + "mappings": [], + "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]}, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": {"h": 9, "w": 12, "x": 12, "y": 18}, + "id": 6, + "options": { + "legend": {"calcs": ["lastNotNull"], "displayMode": "table", "placement": "right", "showLegend": true}, + "tooltip": {"mode": "multi", "sort": "desc"} + }, + "targets": [{"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "rate(kubelet_pleg_relist_duration_seconds_count[5m])", "legendFormat": "relists/s {{instance}}", "refId": "A"}], + "title": "Kubelet PLEG Relist Rate", + "type": "timeseries" + }, + { + "datasource": {"type": "prometheus", "uid": "Prometheus"}, + "fieldConfig": { + "defaults": { + "color": {"mode": "thresholds"}, + "mappings": [ + {"options": {"0": {"text": "Down", "color": "red"}, "1": {"text": "Up", "color": "green"}}, "type": "value"} + ], + "thresholds": {"mode": "absolute", "steps": [{"color": "red", "value": null}, {"color": "green", "value": 1}]} + }, + "overrides": [] + }, + "gridPos": {"h": 6, "w": 24, "x": 0, "y": 27}, + "id": 7, + "options": { + "colorMode": "background", + "graphMode": "none", + "justifyMode": "center", + "orientation": "horizontal", + "reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false}, + "textMode": "value_and_name" + }, + "pluginVersion": "10.2.3", + "targets": [{"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "up", "refId": "A"}], + "title": "All Scrape Targets Status", + "type": "stat" + } + ], + "refresh": "30s", + "schemaVersion": 38, + "style": "dark", + "tags": ["k3s", "control-plane"], + "templating": {"list": []}, + "time": {"from": "now-6h", "to": "now"}, + "timepicker": {}, + "timezone": "", + "title": "Control Plane & API Server", + "uid": "k3s-control-plane", + "version": 1, + "weekStart": "" + } diff --git a/monitoring/grafana-dashboard-nodes.yaml b/monitoring/grafana-dashboard-nodes.yaml new file mode 100644 index 0000000..b9f249d --- /dev/null +++ b/monitoring/grafana-dashboard-nodes.yaml @@ -0,0 +1,279 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: grafana-dashboard-nodes + namespace: monitoring + labels: + app: grafana + grafana_dashboard: "1" +data: + nodes.json: | + { + "annotations": {"list": []}, + "editable": true, + "graphTooltip": 1, + "id": null, + "links": [], + "liveNow": false, + "panels": [ + { + "datasource": {"type": "prometheus", "uid": "Prometheus"}, + "fieldConfig": { + "defaults": { + "color": {"mode": "thresholds"}, + "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]}, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": {"h": 6, "w": 6, "x": 0, "y": 0}, + "id": 1, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false}, + "textMode": "value_and_name" + }, + "pluginVersion": "10.2.3", + "targets": [{"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "kubelet_running_pods", "refId": "A"}, {"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "kubelet_running_containers", "refId": "B"}], + "title": "Pods / Containers per Node", + "type": "stat" + }, + { + "datasource": {"type": "prometheus", "uid": "Prometheus"}, + "fieldConfig": { + "defaults": { + "color": {"mode": "thresholds"}, + "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}, {"color": "orange", "value": 70}, {"color": "red", "value": 90}]}, + "unit": "percent", + "min": 0, + "max": 100 + }, + "overrides": [] + }, + "gridPos": {"h": 6, "w": 18, "x": 6, "y": 0}, + "id": 2, + "options": { + "colorMode": "background", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false}, + "textMode": "value_and_name" + }, + "pluginVersion": "10.2.3", + "targets": [{"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "100 - (avg by (instance) (rate(node_cpu_seconds_total{mode=\"idle\"}[5m])) * 100)", "legendFormat": "{{instance}}", "refId": "A"}], + "title": "Node CPU Usage %", + "type": "stat" + }, + { + "datasource": {"type": "prometheus", "uid": "Prometheus"}, + "fieldConfig": { + "defaults": { + "color": {"mode": "palette-classic"}, + "custom": { + "drawStyle": "line", "fillOpacity": 10, "lineInterpolation": "linear", "lineWidth": 1, "showPoints": "never", "spanNulls": true, + "stacking": {"group": "A", "mode": "none"}, "thresholdsStyle": {"mode": "off"} + }, + "mappings": [], + "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]}, + "unit": "percent", + "min": 0, + "max": 100 + }, + "overrides": [] + }, + "gridPos": {"h": 9, "w": 12, "x": 0, "y": 6}, + "id": 3, + "options": { + "legend": {"calcs": ["lastNotNull"], "displayMode": "table", "placement": "right", "showLegend": true}, + "tooltip": {"mode": "multi", "sort": "desc"} + }, + "targets": [{"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "100 - (avg by (instance) (rate(node_cpu_seconds_total{mode=\"idle\"}[5m])) * 100)", "legendFormat": "{{instance}}", "refId": "A"}], + "title": "Node CPU Usage % (over time)", + "type": "timeseries" + }, + { + "datasource": {"type": "prometheus", "uid": "Prometheus"}, + "fieldConfig": { + "defaults": { + "color": {"mode": "palette-classic"}, + "custom": { + "drawStyle": "line", "fillOpacity": 10, "lineInterpolation": "linear", "lineWidth": 1, "showPoints": "never", "spanNulls": true, + "stacking": {"group": "A", "mode": "none"}, "thresholdsStyle": {"mode": "off"} + }, + "mappings": [], + "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]}, + "unit": "percent", + "min": 0, + "max": 100 + }, + "overrides": [] + }, + "gridPos": {"h": 9, "w": 12, "x": 12, "y": 6}, + "id": 4, + "options": { + "legend": {"calcs": ["lastNotNull"], "displayMode": "table", "placement": "right", "showLegend": true}, + "tooltip": {"mode": "multi", "sort": "desc"} + }, + "targets": [{"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "(1 - (node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes)) * 100", "legendFormat": "{{instance}}", "refId": "A"}], + "title": "Node Memory Usage %", + "type": "timeseries" + }, + { + "datasource": {"type": "prometheus", "uid": "Prometheus"}, + "fieldConfig": { + "defaults": { + "color": {"mode": "palette-classic"}, + "custom": { + "drawStyle": "line", "fillOpacity": 10, "lineInterpolation": "linear", "lineWidth": 1, "showPoints": "never", "spanNulls": true, + "stacking": {"group": "A", "mode": "none"}, "thresholdsStyle": {"mode": "off"} + }, + "mappings": [], + "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]}, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": {"h": 9, "w": 12, "x": 0, "y": 15}, + "id": 5, + "options": { + "legend": {"calcs": ["lastNotNull"], "displayMode": "table", "placement": "right", "showLegend": true}, + "tooltip": {"mode": "multi", "sort": "desc"} + }, + "targets": [{"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "sum(container_memory_working_set_bytes{container!=\"\",container!=\"POD\"}) by (instance)", "legendFormat": "used {{instance}}", "refId": "A"}, {"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "node_memory_MemTotal_bytes", "legendFormat": "total {{instance}}", "refId": "B"}], + "title": "Node Memory (used vs total)", + "type": "timeseries" + }, + { + "datasource": {"type": "prometheus", "uid": "Prometheus"}, + "fieldConfig": { + "defaults": { + "color": {"mode": "palette-classic"}, + "custom": { + "drawStyle": "line", "fillOpacity": 10, "lineInterpolation": "linear", "lineWidth": 1, "showPoints": "never", "spanNulls": true, + "stacking": {"group": "A", "mode": "none"}, "thresholdsStyle": {"mode": "off"} + }, + "mappings": [], + "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]}, + "unit": "Bps" + }, + "overrides": [] + }, + "gridPos": {"h": 9, "w": 12, "x": 12, "y": 15}, + "id": 6, + "options": { + "legend": {"calcs": ["lastNotNull"], "displayMode": "table", "placement": "right", "showLegend": true}, + "tooltip": {"mode": "multi", "sort": "desc"} + }, + "targets": [ + {"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "sum by (instance) (rate(node_network_receive_bytes_total{device!~\"lo|veth.*|docker.*|br-.*|cni.*|flannel.*\"}[5m]))", "legendFormat": "RX {{instance}}", "refId": "A"}, + {"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "sum by (instance) (rate(node_network_transmit_bytes_total{device!~\"lo|veth.*|docker.*|br-.*|cni.*|flannel.*\"}[5m]))", "legendFormat": "TX {{instance}}", "refId": "B"} + ], + "title": "Node Network Traffic", + "type": "timeseries" + }, + { + "datasource": {"type": "prometheus", "uid": "Prometheus"}, + "fieldConfig": { + "defaults": { + "color": {"mode": "palette-classic"}, + "custom": { + "drawStyle": "line", "fillOpacity": 10, "lineInterpolation": "linear", "lineWidth": 1, "showPoints": "never", "spanNulls": true, + "stacking": {"group": "A", "mode": "none"}, "thresholdsStyle": {"mode": "off"} + }, + "mappings": [], + "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]}, + "unit": "percent", + "min": 0, + "max": 100 + }, + "overrides": [] + }, + "gridPos": {"h": 9, "w": 12, "x": 0, "y": 24}, + "id": 7, + "options": { + "legend": {"calcs": ["lastNotNull"], "displayMode": "table", "placement": "right", "showLegend": true}, + "tooltip": {"mode": "multi", "sort": "desc"} + }, + "targets": [{"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "(1 - (node_filesystem_avail_bytes{fstype!~\"tmpfs|overlay|squashfs\"} / node_filesystem_size_bytes{fstype!~\"tmpfs|overlay|squashfs\"})) * 100", "legendFormat": "{{instance}} {{mountpoint}}", "refId": "A"}], + "title": "Node Disk Usage %", + "type": "timeseries" + }, + { + "datasource": {"type": "prometheus", "uid": "Prometheus"}, + "fieldConfig": { + "defaults": { + "color": {"mode": "palette-classic"}, + "custom": { + "drawStyle": "line", "fillOpacity": 10, "lineInterpolation": "linear", "lineWidth": 1, "showPoints": "never", "spanNulls": true, + "stacking": {"group": "A", "mode": "none"}, "thresholdsStyle": {"mode": "off"} + }, + "mappings": [], + "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]}, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": {"h": 9, "w": 12, "x": 12, "y": 24}, + "id": 8, + "options": { + "legend": {"calcs": ["lastNotNull"], "displayMode": "table", "placement": "right", "showLegend": true}, + "tooltip": {"mode": "multi", "sort": "desc"} + }, + "targets": [ + {"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "node_load1", "legendFormat": "1m {{instance}}", "refId": "A"}, + {"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "node_load5", "legendFormat": "5m {{instance}}", "refId": "B"}, + {"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "node_load15", "legendFormat": "15m {{instance}}", "refId": "C"} + ], + "title": "Node Load Average", + "type": "timeseries" + }, + { + "datasource": {"type": "prometheus", "uid": "Prometheus"}, + "fieldConfig": { + "defaults": { + "color": {"mode": "thresholds"}, + "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]}, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": {"h": 9, "w": 24, "x": 0, "y": 33}, + "id": 9, + "options": { + "showHeader": true, + "cellHeight": "sm", + "footer": {"show": false, "reducer": ["sum"], "countRows": false, "fields": ""} + }, + "pluginVersion": "10.2.3", + "targets": [ + {"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "kubelet_running_pods", "format": "table", "instant": true, "refId": "A"}, + {"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "kubelet_running_containers", "format": "table", "instant": true, "refId": "B"}, + {"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "100 - (avg by (instance) (rate(node_cpu_seconds_total{mode=\"idle\"}[5m])) * 100)", "format": "table", "instant": true, "refId": "C"}, + {"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "(1 - (node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes)) * 100", "format": "table", "instant": true, "refId": "D"} + ], + "title": "Node Summary (live)", + "type": "table", + "transformations": [ + {"id": "merge", "options": {}}, + {"id": "groupBy", "options": {"fields": {"Value": {"aggregations": ["lastNotNull"], "operation": "aggregate"}, "Value #B": {"aggregations": ["lastNotNull"], "operation": "aggregate"}, "Value #C": {"aggregations": ["lastNotNull"], "operation": "aggregate"}, "Value #D": {"aggregations": ["lastNotNull"], "operation": "aggregate"}, "instance": {"aggregations": [], "operation": "groupby"}}}}, + {"id": "organize", "options": {"excludeByName": {"Time": true}, "renameByName": {"Value": "Pods", "Value #B": "Containers", "Value #C": "CPU %", "Value #D": "Memory %"}}} + ] + } + ], + "refresh": "30s", + "schemaVersion": 38, + "style": "dark", + "tags": ["k3s", "nodes"], + "templating": {"list": []}, + "time": {"from": "now-6h", "to": "now"}, + "timepicker": {}, + "timezone": "", + "title": "Nodes", + "uid": "k3s-nodes", + "version": 2, + "weekStart": "" + } diff --git a/monitoring/grafana-dashboard-pods.yaml b/monitoring/grafana-dashboard-pods.yaml new file mode 100644 index 0000000..0b0a548 --- /dev/null +++ b/monitoring/grafana-dashboard-pods.yaml @@ -0,0 +1,312 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: grafana-dashboard-pods + namespace: monitoring + labels: + app: grafana + grafana_dashboard: "1" +data: + pods.json: | + { + "annotations": {"list": []}, + "editable": true, + "graphTooltip": 1, + "id": null, + "links": [], + "liveNow": false, + "panels": [ + { + "datasource": {"type": "prometheus", "uid": "Prometheus"}, + "fieldConfig": { + "defaults": { + "color": {"mode": "palette-classic"}, + "custom": { + "drawStyle": "line", "fillOpacity": 10, "lineInterpolation": "linear", "lineWidth": 1, "showPoints": "never", "spanNulls": true, + "stacking": {"group": "A", "mode": "normal"}, "thresholdsStyle": {"mode": "off"} + }, + "mappings": [], + "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]}, + "unit": "core" + }, + "overrides": [] + }, + "gridPos": {"h": 9, "w": 24, "x": 0, "y": 0}, + "id": 1, + "options": { + "legend": {"calcs": ["lastNotNull"], "displayMode": "table", "placement": "right", "showLegend": true}, + "tooltip": {"mode": "multi", "sort": "desc"} + }, + "targets": [{"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "sum(rate(container_cpu_usage_seconds_total{container!=\"\",container!=\"POD\",namespace=~\"$namespace\"}[5m])) by (pod)", "legendFormat": "{{pod}}", "refId": "A"}], + "title": "CPU Usage per Pod", + "type": "timeseries" + }, + { + "datasource": {"type": "prometheus", "uid": "Prometheus"}, + "fieldConfig": { + "defaults": { + "color": {"mode": "palette-classic"}, + "custom": { + "drawStyle": "line", "fillOpacity": 10, "lineInterpolation": "linear", "lineWidth": 1, "showPoints": "never", "spanNulls": true, + "stacking": {"group": "A", "mode": "normal"}, "thresholdsStyle": {"mode": "off"} + }, + "mappings": [], + "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]}, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": {"h": 9, "w": 24, "x": 0, "y": 9}, + "id": 2, + "options": { + "legend": {"calcs": ["lastNotNull"], "displayMode": "table", "placement": "right", "showLegend": true}, + "tooltip": {"mode": "multi", "sort": "desc"} + }, + "targets": [{"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "sum(container_memory_working_set_bytes{container!=\"\",container!=\"POD\",namespace=~\"$namespace\"}) by (pod)", "legendFormat": "{{pod}}", "refId": "A"}], + "title": "Memory Usage per Pod", + "type": "timeseries" + }, + { + "datasource": {"type": "prometheus", "uid": "Prometheus"}, + "fieldConfig": { + "defaults": { + "color": {"mode": "palette-classic"}, + "custom": { + "drawStyle": "line", "fillOpacity": 10, "lineInterpolation": "linear", "lineWidth": 1, "showPoints": "never", "spanNulls": true, + "stacking": {"group": "A", "mode": "none"}, "thresholdsStyle": {"mode": "off"} + }, + "mappings": [], + "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]}, + "unit": "Bps" + }, + "overrides": [] + }, + "gridPos": {"h": 9, "w": 12, "x": 0, "y": 18}, + "id": 3, + "options": { + "legend": {"calcs": ["lastNotNull"], "displayMode": "table", "placement": "right", "showLegend": true}, + "tooltip": {"mode": "multi", "sort": "desc"} + }, + "targets": [ + {"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "sum(rate(container_network_receive_bytes_total{namespace=~\"$namespace\"}[5m])) by (pod)", "legendFormat": "RX {{pod}}", "refId": "A"} + ], + "title": "Network RX per Pod", + "type": "timeseries" + }, + { + "datasource": {"type": "prometheus", "uid": "Prometheus"}, + "fieldConfig": { + "defaults": { + "color": {"mode": "palette-classic"}, + "custom": { + "drawStyle": "line", "fillOpacity": 10, "lineInterpolation": "linear", "lineWidth": 1, "showPoints": "never", "spanNulls": true, + "stacking": {"group": "A", "mode": "none"}, "thresholdsStyle": {"mode": "off"} + }, + "mappings": [], + "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]}, + "unit": "Bps" + }, + "overrides": [] + }, + "gridPos": {"h": 9, "w": 12, "x": 12, "y": 18}, + "id": 4, + "options": { + "legend": {"calcs": ["lastNotNull"], "displayMode": "table", "placement": "right", "showLegend": true}, + "tooltip": {"mode": "multi", "sort": "desc"} + }, + "targets": [ + {"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "sum(rate(container_network_transmit_bytes_total{namespace=~\"$namespace\"}[5m])) by (pod)", "legendFormat": "TX {{pod}}", "refId": "A"} + ], + "title": "Network TX per Pod", + "type": "timeseries" + }, + { + "datasource": {"type": "prometheus", "uid": "Prometheus"}, + "fieldConfig": { + "defaults": { + "color": {"mode": "palette-classic"}, + "custom": { + "drawStyle": "line", "fillOpacity": 10, "lineInterpolation": "linear", "lineWidth": 1, "showPoints": "never", "spanNulls": true, + "stacking": {"group": "A", "mode": "none"}, "thresholdsStyle": {"mode": "off"} + }, + "mappings": [], + "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]}, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": {"h": 9, "w": 12, "x": 0, "y": 27}, + "id": 5, + "options": { + "legend": {"calcs": ["lastNotNull"], "displayMode": "table", "placement": "right", "showLegend": true}, + "tooltip": {"mode": "multi", "sort": "desc"} + }, + "targets": [{"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "sum(container_fs_usage_bytes{namespace=~\"$namespace\"}) by (pod)", "legendFormat": "{{pod}}", "refId": "A"}], + "title": "Filesystem Usage per Pod", + "type": "timeseries" + }, + { + "datasource": {"type": "prometheus", "uid": "Prometheus"}, + "fieldConfig": { + "defaults": { + "color": {"mode": "palette-classic"}, + "custom": { + "drawStyle": "line", "fillOpacity": 10, "lineInterpolation": "linear", "lineWidth": 1, "showPoints": "never", "spanNulls": true, + "stacking": {"group": "A", "mode": "none"}, "thresholdsStyle": {"mode": "off"} + }, + "mappings": [], + "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]}, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": {"h": 9, "w": 12, "x": 12, "y": 27}, + "id": 6, + "options": { + "legend": {"calcs": ["lastNotNull"], "displayMode": "table", "placement": "right", "showLegend": true}, + "tooltip": {"mode": "multi", "sort": "desc"} + }, + "targets": [{"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "sum(rate(container_cpu_cfs_throttled_seconds_total{namespace=~\"$namespace\"}[5m])) by (pod) / sum(rate(container_cpu_cfs_periods_total{namespace=~\"$namespace\"}[5m])) by (pod) * 100", "legendFormat": "{{pod}}", "refId": "A"}], + "title": "CPU Throttling % per Pod", + "type": "timeseries" + }, + { + "datasource": {"type": "prometheus", "uid": "Prometheus"}, + "fieldConfig": { + "defaults": { + "color": {"mode": "thresholds"}, + "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}, {"color": "yellow", "value": 1}, {"color": "red", "value": 5}]}, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": {"h": 10, "w": 24, "x": 0, "y": 36}, + "id": 7, + "options": { + "showHeader": true, + "cellHeight": "sm", + "footer": {"show": false, "reducer": ["sum"], "countRows": false, "fields": ""} + }, + "pluginVersion": "10.2.3", + "targets": [ + {"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "sum by (namespace, pod) (container_memory_working_set_bytes{container!=\"\",container!=\"POD\",namespace=~\"$namespace\"})", "format": "table", "instant": true, "refId": "A"}, + {"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "sum by (namespace, pod) (rate(container_cpu_usage_seconds_total{container!=\"\",container!=\"POD\",namespace=~\"$namespace\"}[5m]))", "format": "table", "instant": true, "refId": "B"}, + {"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "sum by (namespace, pod) (rate(container_network_receive_bytes_total{namespace=~\"$namespace\"}[5m]))", "format": "table", "instant": true, "refId": "C"} + ], + "title": "Pod Resource Summary (live)", + "type": "table", + "transformations": [ + {"id": "merge", "options": {}}, + {"id": "groupBy", "options": {"fields": {"Value": {"aggregations": ["lastNotNull"], "operation": "aggregate"}, "Value #B": {"aggregations": ["lastNotNull"], "operation": "aggregate"}, "Value #C": {"aggregations": ["lastNotNull"], "operation": "aggregate"}, "namespace": {"aggregations": [], "operation": "groupby"}, "pod": {"aggregations": [], "operation": "groupby"}}}}, + {"id": "organize", "options": {"excludeByName": {"Time": true}, "renameByName": {"Value": "Memory (bytes)", "Value #B": "CPU (cores)", "Value #C": "Network RX (Bps)"}}} + ] + }, + { + "datasource": {"type": "prometheus", "uid": "Prometheus"}, + "fieldConfig": { + "defaults": { + "color": {"mode": "palette-classic"}, + "custom": { + "drawStyle": "line", "fillOpacity": 10, "lineInterpolation": "linear", "lineWidth": 1, "showPoints": "never", "spanNulls": true, + "stacking": {"group": "A", "mode": "none"}, "thresholdsStyle": {"mode": "off"} + }, + "mappings": [], + "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]}, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": {"h": 9, "w": 12, "x": 0, "y": 46}, + "id": 8, + "options": { + "legend": {"calcs": ["lastNotNull"], "displayMode": "table", "placement": "right", "showLegend": true}, + "tooltip": {"mode": "multi", "sort": "desc"} + }, + "targets": [{"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "sum by (namespace) (kube_pod_status_phase{phase=~\"Running|Pending|Failed\",namespace=~\"$namespace\"})", "legendFormat": "{{namespace}} {{phase}}", "refId": "A"}], + "title": "Pod Status by Namespace (KSM)", + "type": "timeseries" + }, + { + "datasource": {"type": "prometheus", "uid": "Prometheus"}, + "fieldConfig": { + "defaults": { + "color": {"mode": "palette-classic"}, + "custom": { + "drawStyle": "line", "fillOpacity": 10, "lineInterpolation": "linear", "lineWidth": 1, "showPoints": "never", "spanNulls": true, + "stacking": {"group": "A", "mode": "none"}, "thresholdsStyle": {"mode": "off"} + }, + "mappings": [], + "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]}, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": {"h": 9, "w": 12, "x": 12, "y": 46}, + "id": 9, + "options": { + "legend": {"calcs": ["lastNotNull"], "displayMode": "table", "placement": "right", "showLegend": true}, + "tooltip": {"mode": "multi", "sort": "desc"} + }, + "targets": [{"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "sum by (namespace) (increase(kube_pod_container_status_restarts_total{namespace=~\"$namespace\"}[1h]))", "legendFormat": "{{namespace}}", "refId": "A"}], + "title": "Container Restarts (last 1h)", + "type": "timeseries" + }, + { + "datasource": {"type": "prometheus", "uid": "Prometheus"}, + "fieldConfig": { + "defaults": { + "color": {"mode": "palette-classic"}, + "custom": { + "drawStyle": "line", "fillOpacity": 10, "lineInterpolation": "linear", "lineWidth": 1, "showPoints": "never", "spanNulls": true, + "stacking": {"group": "A", "mode": "none"}, "thresholdsStyle": {"mode": "off"} + }, + "mappings": [], + "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]}, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": {"h": 9, "w": 24, "x": 0, "y": 55}, + "id": 10, + "options": { + "legend": {"calcs": ["lastNotNull"], "displayMode": "table", "placement": "right", "showLegend": true}, + "tooltip": {"mode": "multi", "sort": "desc"} + }, + "targets": [{"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "kube_persistentvolumeclaim_resource_requests_storage_bytes{namespace=~\"$namespace\"}", "legendFormat": "{{namespace}}/{{persistentvolumeclaim}}", "refId": "A"}], + "title": "PVC Storage Requests by Claim (KSM)", + "type": "timeseries" + } + ], + "refresh": "30s", + "schemaVersion": 38, + "style": "dark", + "tags": ["k3s", "pods"], + "templating": { + "list": [ + { + "allValue": ".*", + "current": {"selected": true, "text": "All", "value": "$__all"}, + "datasource": {"type": "prometheus", "uid": "Prometheus"}, + "definition": "label_values(container_cpu_usage_seconds_total, namespace)", + "hide": 0, + "includeAll": true, + "multi": true, + "name": "namespace", + "options": [], + "query": "label_values(container_cpu_usage_seconds_total, namespace)", + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "type": "query" + } + ] + }, + "time": {"from": "now-6h", "to": "now"}, + "timepicker": {}, + "timezone": "", + "title": "Pods & Services", + "uid": "k3s-pods", + "version": 2, + "weekStart": "" + } diff --git a/monitoring/grafana-dashboard-prometheus.yaml b/monitoring/grafana-dashboard-prometheus.yaml new file mode 100644 index 0000000..eff62b4 --- /dev/null +++ b/monitoring/grafana-dashboard-prometheus.yaml @@ -0,0 +1,218 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: grafana-dashboard-prometheus + namespace: monitoring + labels: + app: grafana + grafana_dashboard: "1" +data: + prometheus.json: | + { + "annotations": {"list": []}, + "editable": true, + "graphTooltip": 1, + "id": null, + "links": [], + "liveNow": false, + "panels": [ + { + "datasource": {"type": "prometheus", "uid": "Prometheus"}, + "fieldConfig": { + "defaults": { + "color": {"mode": "thresholds"}, + "thresholds": {"mode": "absolute", "steps": [{"color": "red", "value": null}, {"color": "green", "value": 1}]}, + "mappings": [{"options": {"0": {"text": "DOWN", "color": "red"}, "1": {"text": "UP", "color": "green"}}, "type": "value"}] + }, + "overrides": [] + }, + "gridPos": {"h": 5, "w": 6, "x": 0, "y": 0}, + "id": 1, + "options": {"colorMode": "background", "graphMode": "none", "justifyMode": "center", "orientation": "horizontal", "reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false}, "textMode": "value"}, + "pluginVersion": "10.2.3", + "targets": [{"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "up{job=\"prometheus\"}", "refId": "A"}], + "title": "Prometheus Status", + "type": "stat" + }, + { + "datasource": {"type": "prometheus", "uid": "Prometheus"}, + "fieldConfig": { + "defaults": { + "color": {"mode": "thresholds"}, + "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]}, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": {"h": 5, "w": 6, "x": 6, "y": 0}, + "id": 2, + "options": {"colorMode": "value", "graphMode": "area", "justifyMode": "auto", "orientation": "auto", "reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false}, "textMode": "auto"}, + "pluginVersion": "10.2.3", + "targets": [{"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "process_resident_memory_bytes{job=\"prometheus\"}", "refId": "A"}], + "title": "Prometheus RSS Memory", + "type": "stat" + }, + { + "datasource": {"type": "prometheus", "uid": "Prometheus"}, + "fieldConfig": { + "defaults": { + "color": {"mode": "thresholds"}, + "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]}, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": {"h": 5, "w": 6, "x": 12, "y": 0}, + "id": 3, + "options": {"colorMode": "value", "graphMode": "area", "justifyMode": "auto", "orientation": "auto", "reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false}, "textMode": "auto"}, + "pluginVersion": "10.2.3", + "targets": [{"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "prometheus_tsdb_head_series", "refId": "A"}], + "title": "Active Series", + "type": "stat" + }, + { + "datasource": {"type": "prometheus", "uid": "Prometheus"}, + "fieldConfig": { + "defaults": { + "color": {"mode": "thresholds"}, + "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]}, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": {"h": 5, "w": 6, "x": 18, "y": 0}, + "id": 4, + "options": {"colorMode": "value", "graphMode": "area", "justifyMode": "auto", "orientation": "auto", "reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false}, "textMode": "auto"}, + "pluginVersion": "10.2.3", + "targets": [{"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "count(up)", "refId": "A"}], + "title": "Scrape Targets", + "type": "stat" + }, + { + "datasource": {"type": "prometheus", "uid": "Prometheus"}, + "fieldConfig": { + "defaults": { + "color": {"mode": "palette-classic"}, + "custom": {"drawStyle": "line", "fillOpacity": 10, "lineInterpolation": "linear", "lineWidth": 1, "showPoints": "never", "spanNulls": true, "stacking": {"group": "A", "mode": "none"}, "thresholdsStyle": {"mode": "off"}}, + "mappings": [], + "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]}, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": {"h": 9, "w": 12, "x": 0, "y": 5}, + "id": 10, + "options": {"legend": {"calcs": ["lastNotNull"], "displayMode": "table", "placement": "right", "showLegend": true}, "tooltip": {"mode": "multi", "sort": "desc"}}, + "targets": [{"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "process_resident_memory_bytes{job=\"prometheus\"}", "legendFormat": "RSS", "refId": "A"}, {"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "prometheus_tsdb_head_memory_postings_total", "legendFormat": "postings", "refId": "B"}], + "title": "Prometheus Memory", + "type": "timeseries" + }, + { + "datasource": {"type": "prometheus", "uid": "Prometheus"}, + "fieldConfig": { + "defaults": { + "color": {"mode": "palette-classic"}, + "custom": {"drawStyle": "line", "fillOpacity": 10, "lineInterpolation": "linear", "lineWidth": 1, "showPoints": "never", "spanNulls": true, "stacking": {"group": "A", "mode": "none"}, "thresholdsStyle": {"mode": "off"}}, + "mappings": [], + "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]}, + "unit": "core" + }, + "overrides": [] + }, + "gridPos": {"h": 9, "w": 12, "x": 12, "y": 5}, + "id": 11, + "options": {"legend": {"calcs": ["lastNotNull"], "displayMode": "table", "placement": "right", "showLegend": true}, "tooltip": {"mode": "multi", "sort": "desc"}}, + "targets": [{"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "rate(process_cpu_seconds_total{job=\"prometheus\"}[5m])", "legendFormat": "prometheus", "refId": "A"}], + "title": "Prometheus CPU", + "type": "timeseries" + }, + { + "datasource": {"type": "prometheus", "uid": "Prometheus"}, + "fieldConfig": { + "defaults": { + "color": {"mode": "palette-classic"}, + "custom": {"drawStyle": "line", "fillOpacity": 10, "lineInterpolation": "linear", "lineWidth": 1, "showPoints": "never", "spanNulls": true, "stacking": {"group": "A", "mode": "none"}, "thresholdsStyle": {"mode": "off"}}, + "mappings": [], + "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]}, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": {"h": 9, "w": 12, "x": 0, "y": 14}, + "id": 12, + "options": {"legend": {"calcs": ["lastNotNull"], "displayMode": "table", "placement": "right", "showLegend": true}, "tooltip": {"mode": "multi", "sort": "desc"}}, + "targets": [{"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "rate(prometheus_tsdb_head_samples_appended_total[5m])", "legendFormat": "samples/s", "refId": "A"}], + "title": "Ingestion Rate (samples/s)", + "type": "timeseries" + }, + { + "datasource": {"type": "prometheus", "uid": "Prometheus"}, + "fieldConfig": { + "defaults": { + "color": {"mode": "palette-classic"}, + "custom": {"drawStyle": "line", "fillOpacity": 10, "lineInterpolation": "linear", "lineWidth": 1, "showPoints": "never", "spanNulls": true, "stacking": {"group": "A", "mode": "none"}, "thresholdsStyle": {"mode": "off"}}, + "mappings": [], + "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]}, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": {"h": 9, "w": 12, "x": 12, "y": 14}, + "id": 13, + "options": {"legend": {"calcs": ["lastNotNull"], "displayMode": "table", "placement": "right", "showLegend": true}, "tooltip": {"mode": "multi", "sort": "desc"}}, + "targets": [{"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "scrape_duration_seconds", "legendFormat": "{{job}} {{instance}}", "refId": "A"}], + "title": "Scrape Duration by Job", + "type": "timeseries" + }, + { + "datasource": {"type": "prometheus", "uid": "Prometheus"}, + "fieldConfig": { + "defaults": { + "color": {"mode": "palette-classic"}, + "custom": {"drawStyle": "line", "fillOpacity": 10, "lineInterpolation": "linear", "lineWidth": 1, "showPoints": "never", "spanNulls": true, "stacking": {"group": "A", "mode": "none"}, "thresholdsStyle": {"mode": "off"}}, + "mappings": [], + "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]}, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": {"h": 9, "w": 12, "x": 0, "y": 23}, + "id": 14, + "options": {"legend": {"calcs": ["lastNotNull"], "displayMode": "table", "placement": "right", "showLegend": true}, "tooltip": {"mode": "multi", "sort": "desc"}}, + "targets": [{"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "prometheus_tsdb_head_series", "legendFormat": "head series", "refId": "A"}, {"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "prometheus_tsdb_head_chunks", "legendFormat": "head chunks", "refId": "B"}], + "title": "TSDB Head Series & Chunks", + "type": "timeseries" + }, + { + "datasource": {"type": "prometheus", "uid": "Prometheus"}, + "fieldConfig": { + "defaults": { + "color": {"mode": "palette-classic"}, + "custom": {"drawStyle": "line", "fillOpacity": 10, "lineInterpolation": "linear", "lineWidth": 1, "showPoints": "never", "spanNulls": true, "stacking": {"group": "A", "mode": "none"}, "thresholdsStyle": {"mode": "off"}}, + "mappings": [], + "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]}, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": {"h": 9, "w": 12, "x": 12, "y": 23}, + "id": 15, + "options": {"legend": {"calcs": ["lastNotNull"], "displayMode": "table", "placement": "right", "showLegend": true}, "tooltip": {"mode": "multi", "sort": "desc"}}, + "targets": [{"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "rate(prometheus_http_request_duration_seconds_sum[5m]) / rate(prometheus_http_request_duration_seconds_count[5m])", "legendFormat": "avg HTTP req", "refId": "A"}], + "title": "Prometheus HTTP Request Duration", + "type": "timeseries" + } + ], + "refresh": "30s", + "schemaVersion": 38, + "style": "dark", + "tags": ["k3s", "prometheus"], + "templating": {"list": []}, + "time": {"from": "now-6h", "to": "now"}, + "timepicker": {}, + "timezone": "", + "title": "Prometheus Self-Monitoring", + "uid": "k3s-prometheus", + "version": 1, + "weekStart": "" + } diff --git a/monitoring/grafana-dashboard-provider.yaml b/monitoring/grafana-dashboard-provider.yaml new file mode 100644 index 0000000..6bed1e2 --- /dev/null +++ b/monitoring/grafana-dashboard-provider.yaml @@ -0,0 +1,20 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: grafana-dashboard-provider + namespace: monitoring + labels: + app: grafana +data: + provider.yaml: | + apiVersion: 1 + providers: + - name: 'k3s-dashboards' + orgId: 1 + folder: 'K3s Cluster' + type: file + disableDeletion: false + updateIntervalSeconds: 30 + allowUiUpdates: true + options: + path: /var/lib/grafana/dashboards diff --git a/monitoring/grafana-deployment.yaml b/monitoring/grafana-deployment.yaml index 75a8c86..e7015a1 100644 --- a/monitoring/grafana-deployment.yaml +++ b/monitoring/grafana-deployment.yaml @@ -33,6 +33,18 @@ spec: mountPath: /var/lib/grafana - name: grafana-datasources mountPath: /etc/grafana/provisioning/datasources + - name: grafana-dashboard-provider + mountPath: /etc/grafana/provisioning/dashboards + - name: dashboards-cluster-overview + mountPath: /var/lib/grafana/dashboards/cluster-overview + - name: dashboards-pods + mountPath: /var/lib/grafana/dashboards/pods + - name: dashboards-nodes + mountPath: /var/lib/grafana/dashboards/nodes + - name: dashboards-control-plane + mountPath: /var/lib/grafana/dashboards/control-plane + - name: dashboards-prometheus + mountPath: /var/lib/grafana/dashboards/prometheus resources: requests: memory: "256Mi" @@ -47,3 +59,21 @@ spec: - name: grafana-datasources configMap: name: grafana-datasources + - name: grafana-dashboard-provider + configMap: + name: grafana-dashboard-provider + - name: dashboards-cluster-overview + configMap: + name: grafana-dashboard-cluster-overview + - name: dashboards-pods + configMap: + name: grafana-dashboard-pods + - name: dashboards-nodes + configMap: + name: grafana-dashboard-nodes + - name: dashboards-control-plane + configMap: + name: grafana-dashboard-control-plane + - name: dashboards-prometheus + configMap: + name: grafana-dashboard-prometheus diff --git a/monitoring/kube-state-metrics.yaml b/monitoring/kube-state-metrics.yaml new file mode 100644 index 0000000..fc20bb2 --- /dev/null +++ b/monitoring/kube-state-metrics.yaml @@ -0,0 +1,118 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + name: kube-state-metrics + namespace: monitoring +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: kube-state-metrics +rules: + - apiGroups: [""] + resources: + - configmaps + - secrets + - nodes + - pods + - services + - resourcequotas + - replicationcontrollers + - limitranges + - persistentvolumeclaims + - persistentvolumes + - namespaces + - endpoints + verbs: ["list", "watch"] + - apiGroups: ["apps"] + resources: ["statefulsets", "daemonsets", "deployments", "replicasets"] + verbs: ["list", "watch"] + - apiGroups: ["batch"] + resources: ["cronjobs", "jobs"] + verbs: ["list", "watch"] + - apiGroups: ["autoscaling"] + resources: ["horizontalpodautoscalers"] + verbs: ["list", "watch"] + - apiGroups: ["networking.k8s.io"] + resources: ["ingresses"] + verbs: ["list", "watch"] + - apiGroups: ["storage.k8s.io"] + resources: ["storageclasses", "volumeattachments"] + verbs: ["list", "watch"] + - apiGroups: ["certificates.k8s.io"] + resources: ["certificatesigningrequests"] + verbs: ["list", "watch"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: kube-state-metrics +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: kube-state-metrics +subjects: + - kind: ServiceAccount + name: kube-state-metrics + namespace: monitoring +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: kube-state-metrics + namespace: monitoring + labels: + app: kube-state-metrics +spec: + replicas: 1 + selector: + matchLabels: + app: kube-state-metrics + template: + metadata: + labels: + app: kube-state-metrics + spec: + serviceAccountName: kube-state-metrics + containers: + - name: kube-state-metrics + image: registry.k8s.io/kube-state-metrics/kube-state-metrics:v2.10.1 + ports: + - containerPort: 8080 + name: http-metrics + - containerPort: 8081 + name: telemetry + readinessProbe: + httpGet: + path: / + port: 8081 + initialDelaySeconds: 5 + timeoutSeconds: 5 + resources: + requests: + memory: "128Mi" + cpu: "100m" + limits: + memory: "512Mi" + cpu: "500m" +--- +apiVersion: v1 +kind: Service +metadata: + name: kube-state-metrics + namespace: monitoring + labels: + app: kube-state-metrics + annotations: + prometheus.io/scrape: "true" + prometheus.io/port: "8080" +spec: + selector: + app: kube-state-metrics + ports: + - name: http-metrics + port: 8080 + targetPort: http-metrics + - name: telemetry + port: 8081 + targetPort: telemetry diff --git a/monitoring/node-exporter.yaml b/monitoring/node-exporter.yaml new file mode 100644 index 0000000..6469393 --- /dev/null +++ b/monitoring/node-exporter.yaml @@ -0,0 +1,112 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + name: node-exporter + namespace: monitoring +--- +apiVersion: v1 +kind: Service +metadata: + name: node-exporter + namespace: monitoring + labels: + app: node-exporter + annotations: + prometheus.io/scrape: "true" + prometheus.io/port: "9100" +spec: + selector: + app: node-exporter + ports: + - name: metrics + port: 9100 + targetPort: 9100 +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: node-exporter +rules: + - apiGroups: [""] + resources: ["nodes"] + verbs: ["get", "list", "watch"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: node-exporter +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: node-exporter +subjects: + - kind: ServiceAccount + name: node-exporter + namespace: monitoring +--- +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: node-exporter + namespace: monitoring + labels: + app: node-exporter +spec: + selector: + matchLabels: + app: node-exporter + template: + metadata: + labels: + app: node-exporter + spec: + serviceAccountName: node-exporter + hostPID: true + hostNetwork: true + tolerations: + - key: node-role.kubernetes.io/control-plane + operator: Exists + effect: NoSchedule + - key: node-role.kubernetes.io/master + operator: Exists + effect: NoSchedule + containers: + - name: node-exporter + image: prom/node-exporter:v1.7.0 + args: + - --path.procfs=/host/proc + - --path.sysfs=/host/sys + - --path.rootfs=/host/root + - --collector.filesystem.mount-points-exclude=^/(dev|proc|sys|var/lib/docker/.+)($|/) + - --collector.filesystem.fs-types-exclude=^(autofs|binfmt_misc|cgroup|configfs|debugfs|devpts|devtmpfs|fusectl|hugetlbfs|mqueue|overlay|proc|procfs|pstore|rpc_pipefs|securityfs|sysfs|tracefs)$ + ports: + - containerPort: 9100 + hostPort: 9100 + name: metrics + volumeMounts: + - name: proc + mountPath: /host/proc + readOnly: true + - name: sys + mountPath: /host/sys + readOnly: true + - name: root + mountPath: /host/root + readOnly: true + resources: + requests: + memory: "64Mi" + cpu: "50m" + limits: + memory: "128Mi" + cpu: "200m" + volumes: + - name: proc + hostPath: + path: /proc + - name: sys + hostPath: + path: /sys + - name: root + hostPath: + path: /