Dashboards (provisioned via ConfigMaps into Grafana pod, 'K3s Cluster' folder): - Cluster Overview: per-namespace CPU/mem/net/fs, pod counts, pod health (KSM) - Pods & Services: per-pod CPU/mem/net/fs, throttling, pod status, restarts, PVCs - Nodes: per-node CPU%/mem%, load average, disk usage, network (node-exporter) - Control Plane & API Server: request rate, latency p95, 5xx, kubelet/PLEG - Prometheus Self-Monitoring: ingestion, series, scrape duration, memory Exporters (auto-scraped via existing kubernetes-service-endpoints job): - kube-state-metrics: pod/deployment/PVC/replica state (kube_pod_status_phase, kube_pod_container_status_restarts_total, kube_persistentvolumeclaim_*) - node-exporter (DaemonSet, hostNetwork): node_cpu_seconds_total, node_memory_*, node_filesystem_*, node_load*, node_network_*
119 lines
2.8 KiB
YAML
119 lines
2.8 KiB
YAML
apiVersion: v1
|
|
kind: ServiceAccount
|
|
metadata:
|
|
name: kube-state-metrics
|
|
namespace: monitoring
|
|
---
|
|
apiVersion: rbac.authorization.k8s.io/v1
|
|
kind: ClusterRole
|
|
metadata:
|
|
name: kube-state-metrics
|
|
rules:
|
|
- apiGroups: [""]
|
|
resources:
|
|
- configmaps
|
|
- secrets
|
|
- nodes
|
|
- pods
|
|
- services
|
|
- resourcequotas
|
|
- replicationcontrollers
|
|
- limitranges
|
|
- persistentvolumeclaims
|
|
- persistentvolumes
|
|
- namespaces
|
|
- endpoints
|
|
verbs: ["list", "watch"]
|
|
- apiGroups: ["apps"]
|
|
resources: ["statefulsets", "daemonsets", "deployments", "replicasets"]
|
|
verbs: ["list", "watch"]
|
|
- apiGroups: ["batch"]
|
|
resources: ["cronjobs", "jobs"]
|
|
verbs: ["list", "watch"]
|
|
- apiGroups: ["autoscaling"]
|
|
resources: ["horizontalpodautoscalers"]
|
|
verbs: ["list", "watch"]
|
|
- apiGroups: ["networking.k8s.io"]
|
|
resources: ["ingresses"]
|
|
verbs: ["list", "watch"]
|
|
- apiGroups: ["storage.k8s.io"]
|
|
resources: ["storageclasses", "volumeattachments"]
|
|
verbs: ["list", "watch"]
|
|
- apiGroups: ["certificates.k8s.io"]
|
|
resources: ["certificatesigningrequests"]
|
|
verbs: ["list", "watch"]
|
|
---
|
|
apiVersion: rbac.authorization.k8s.io/v1
|
|
kind: ClusterRoleBinding
|
|
metadata:
|
|
name: kube-state-metrics
|
|
roleRef:
|
|
apiGroup: rbac.authorization.k8s.io
|
|
kind: ClusterRole
|
|
name: kube-state-metrics
|
|
subjects:
|
|
- kind: ServiceAccount
|
|
name: kube-state-metrics
|
|
namespace: monitoring
|
|
---
|
|
apiVersion: apps/v1
|
|
kind: Deployment
|
|
metadata:
|
|
name: kube-state-metrics
|
|
namespace: monitoring
|
|
labels:
|
|
app: kube-state-metrics
|
|
spec:
|
|
replicas: 1
|
|
selector:
|
|
matchLabels:
|
|
app: kube-state-metrics
|
|
template:
|
|
metadata:
|
|
labels:
|
|
app: kube-state-metrics
|
|
spec:
|
|
serviceAccountName: kube-state-metrics
|
|
containers:
|
|
- name: kube-state-metrics
|
|
image: registry.k8s.io/kube-state-metrics/kube-state-metrics:v2.10.1
|
|
ports:
|
|
- containerPort: 8080
|
|
name: http-metrics
|
|
- containerPort: 8081
|
|
name: telemetry
|
|
readinessProbe:
|
|
httpGet:
|
|
path: /
|
|
port: 8081
|
|
initialDelaySeconds: 5
|
|
timeoutSeconds: 5
|
|
resources:
|
|
requests:
|
|
memory: "128Mi"
|
|
cpu: "100m"
|
|
limits:
|
|
memory: "512Mi"
|
|
cpu: "500m"
|
|
---
|
|
apiVersion: v1
|
|
kind: Service
|
|
metadata:
|
|
name: kube-state-metrics
|
|
namespace: monitoring
|
|
labels:
|
|
app: kube-state-metrics
|
|
annotations:
|
|
prometheus.io/scrape: "true"
|
|
prometheus.io/port: "8080"
|
|
spec:
|
|
selector:
|
|
app: kube-state-metrics
|
|
ports:
|
|
- name: http-metrics
|
|
port: 8080
|
|
targetPort: http-metrics
|
|
- name: telemetry
|
|
port: 8081
|
|
targetPort: telemetry
|