Files
k3s-cluster/validate-scheduling.sh
Roger Oriol aa4793dd51 memory fixes
2026-02-02 20:47:09 +01:00

118 lines
3.6 KiB
Bash
Executable File
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/bin/bash
# Raspberry Pi K3s Scheduling Validation Script
# Run this to check your cluster configuration and pod distribution
echo "=== Kubernetes Node Analysis ==="
echo
echo "1. Node Overview:"
kubectl get nodes -o wide
echo
echo "2. Node Resource Capacity:"
kubectl describe nodes | grep -A 5 "Allocatable:"
echo
echo "3. Node Labels and Taints:"
kubectl get nodes --show-labels
echo
kubectl describe nodes | grep -E "(Name:|Taints:)" | grep -A 1 "Name:"
echo
echo "=== Pod Distribution Analysis ==="
echo
echo "4. High-Resource Pods Location:"
echo "Checking where memory-intensive applications are scheduled..."
echo
echo "n8n PostgreSQL pods:"
kubectl get pods -n n8n -o wide | grep postgres || echo "No n8n postgres pods found"
echo
echo "Minecraft server pods:"
kubectl get pods -n minecraft -o wide || echo "No minecraft pods found"
echo
echo "OpenWebUI pods:"
kubectl get pods -o wide | grep open-webui || echo "No OpenWebUI pods found"
echo
echo "Phoenix pods:"
kubectl get pods -n phoenix -o wide || echo "No Phoenix pods found"
echo
echo "Jellyfin pods:"
kubectl get pods -n jellyfin -o wide || echo "No Jellyfin pods found"
echo
echo "Prometheus pods:"
kubectl get pods -n monitoring -o wide | grep prometheus || echo "No Prometheus pods found"
echo
echo "=== Resource Usage ==="
echo
echo "5. Current Node Resource Usage:"
kubectl top nodes 2>/dev/null || echo "Metrics server not available - install with: kubectl apply -f https://github.com/kubernetes-sigs/metrics-server/releases/latest/download/components.yaml"
echo
echo "6. Top Memory-Consuming Pods:"
kubectl top pods --all-namespaces --sort-by=memory 2>/dev/null | head -10 || echo "Metrics server not available"
echo
echo "=== Pod Events (Recent Issues) ==="
echo
echo "7. Recent Pod Scheduling Events:"
kubectl get events --all-namespaces --sort-by='.lastTimestamp' | grep -E "(Failed|Error|Warning)" | tail -10
echo
echo "=== Validation Summary ==="
echo
# Count pods per node
echo "8. Pod Distribution Per Node:"
echo "Node Pod Count"
echo "------------------------|---------"
kubectl get pods --all-namespaces -o wide --no-headers | awk '{print $8}' | sort | uniq -c | awk '{printf "%-24s| %s\n", $2, $1}'
echo
echo "=== Recommendations ==="
echo
# Check if any high-resource pods are on wrong nodes
echo "9. Checking for Potential Issues:"
# Get Raspberry Pi node name (assumes it has 'pi' in the name or is ARM64)
RPI_NODE=$(kubectl get nodes -o jsonpath='{.items[?(@.status.nodeInfo.architecture=="arm64")].metadata.name}' | head -1)
if [ -n "$RPI_NODE" ]; then
echo "Detected Raspberry Pi node: $RPI_NODE"
# Check if high-resource pods are on RPi
HIGH_MEM_PODS=$(kubectl get pods --all-namespaces -o wide | grep "$RPI_NODE" | grep -E "(postgres|minecraft|phoenix|jellyfin|prometheus|openwebui)")
if [ -n "$HIGH_MEM_PODS" ]; then
echo "⚠️ WARNING: High-resource pods found on Raspberry Pi node:"
echo "$HIGH_MEM_PODS"
echo
echo "These pods should be moved to more powerful nodes."
else
echo "✅ Good: No high-resource pods detected on Raspberry Pi node."
fi
else
echo " Could not auto-detect Raspberry Pi node. Please check manually."
fi
echo
echo "=== Next Steps ==="
echo
echo "If you see high-resource pods on your Raspberry Pi node:"
echo "1. Apply the node labels: kubectl label nodes <powerful-node> hardware=high-memory"
echo "2. Apply the taint: kubectl taint nodes <rpi-node> node-type=raspberry-pi:NoSchedule"
echo "3. Apply updated manifests with nodeSelectors"
echo "4. Delete problematic pods to force rescheduling"
echo
echo "See RASPBERRY_PI_SCHEDULING_FIX.md for detailed instructions."