fix hermes liveness probe
This commit is contained in:
@@ -24,18 +24,37 @@ spec:
|
|||||||
restartPolicy: OnFailure
|
restartPolicy: OnFailure
|
||||||
containers:
|
containers:
|
||||||
- name: seed
|
- name: seed
|
||||||
image: bitnami/kubectl:1.35
|
# alpine is tiny and always available; we install curl + download the
|
||||||
|
# right-arch kubectl binary at runtime (bitnami/kubectl tags are
|
||||||
|
# inconsistent across versions, so we avoid depending on them).
|
||||||
|
image: alpine:3.20
|
||||||
command: ["sh", "-c"]
|
command: ["sh", "-c"]
|
||||||
args:
|
args:
|
||||||
- |
|
- |
|
||||||
set -e
|
set -e
|
||||||
|
|
||||||
|
# Install curl, then download kubectl for this node's architecture.
|
||||||
|
apk add --no-cache curl
|
||||||
|
ARCH=$(uname -m)
|
||||||
|
case "$ARCH" in
|
||||||
|
x86_64) KARCH=amd64 ;;
|
||||||
|
aarch64) KARCH=arm64 ;;
|
||||||
|
armv7l) KARCH=arm ;;
|
||||||
|
*) echo "unsupported arch: $ARCH" >&2; exit 1 ;;
|
||||||
|
esac
|
||||||
|
echo "Downloading kubectl for linux/$KARCH ..."
|
||||||
|
curl -fsSL -o /usr/local/bin/kubectl \
|
||||||
|
"https://dl.k8s.io/release/v1.35.0/bin/linux/${KARCH}/kubectl"
|
||||||
|
chmod +x /usr/local/bin/kubectl
|
||||||
|
kubectl version --client
|
||||||
|
|
||||||
echo "Waiting for hermes pod to be Ready..."
|
echo "Waiting for hermes pod to be Ready..."
|
||||||
kubectl -n platform-engineer wait --for=condition=Ready pod -l app=hermes --timeout=300s || true
|
kubectl -n platform-engineer wait --for=condition=Ready pod -l app=hermes --timeout=300s || true
|
||||||
|
|
||||||
POD=$(kubectl -n platform-engineer get pod -l app=hermes -o jsonpath='{.items[0].metadata.name}')
|
POD=$(kubectl -n platform-engineer get pod -l app=hermes -o jsonpath='{.items[0].metadata.name}')
|
||||||
echo "Using pod: $POD"
|
echo "Using pod: $POD"
|
||||||
|
|
||||||
exists() { kubectl -n platform-engineer exec "$POD" -- hermes cron list 2>/dev/null | grep -qi "name=$1\| $1 "; }
|
exists() { kubectl -n platform-engineer exec "$POD" -- hermes cron list 2>/dev/null | grep -qi " $1 "; }
|
||||||
|
|
||||||
create() {
|
create() {
|
||||||
name="$1"; schedule="$2"; deliver="$3"; prompt="$4"
|
name="$1"; schedule="$2"; deliver="$3"; prompt="$4"
|
||||||
|
|||||||
@@ -128,12 +128,17 @@ spec:
|
|||||||
memory: "2Gi"
|
memory: "2Gi"
|
||||||
cpu: "1000m"
|
cpu: "1000m"
|
||||||
livenessProbe:
|
livenessProbe:
|
||||||
httpGet:
|
# Probe the dashboard port (9119, always enabled via HERMES_DASHBOARD=1
|
||||||
path: /health
|
# and binds 0.0.0.0). The gateway API on 8642 is off by default
|
||||||
port: 8642
|
# (API_SERVER_ENABLED not set), so 9119 is the reliable liveness signal.
|
||||||
initialDelaySeconds: 60
|
# s6 auto-restarts the gateway itself; this probe only catches a wedged
|
||||||
|
# container.
|
||||||
|
tcpSocket:
|
||||||
|
port: 9119
|
||||||
|
initialDelaySeconds: 90
|
||||||
periodSeconds: 30
|
periodSeconds: 30
|
||||||
failureThreshold: 3
|
timeoutSeconds: 5
|
||||||
|
failureThreshold: 5
|
||||||
securityContext:
|
securityContext:
|
||||||
allowPrivilegeEscalation: false
|
allowPrivilegeEscalation: false
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user