Compare commits

..

4 Commits

Author SHA1 Message Date
Roger Oriol
734962d198 fix hermes liveness probe 2026-06-28 00:43:09 +02:00
Roger Oriol
4d9195b32d use hermes stock image for platform engineer 2026-06-27 21:00:03 +02:00
Roger Oriol
54579df4b3 use hermes stock image for platform engineer 2026-06-27 20:40:42 +02:00
Roger Oriol
3f3467cb13 gitea registry ingress 2026-06-27 11:46:53 +02:00
3 changed files with 99 additions and 23 deletions

35
platform-engineer/build-and-push.sh Normal file → Executable file
View File

@@ -1,24 +1,43 @@
#!/usr/bin/env bash
# Build & push the derived Hermes image (kubectl + helm) to the Gitea registry.
# Build & push the derived Hermes image (kubectl + helm).
#
# Run this on a machine with docker + access to git.rogi.casa:
# ./platform-engineer/build-and-push.sh
# Two modes:
# ./build-and-push.sh push # build + push to the Gitea registry
# ./build-and-push.sh local # build + import directly into the NUC's k3s containerd
# # (no registry needed; pod is pinned to this node)
#
# Prereqs:
# - docker login git.rogi.casa (use your Gitea username + access token)
# Default (no arg): push.
set -euo pipefail
REGISTRY="git.rogi.casa"
# Docker registry pushes can't go through the Cloudflare proxy (100 MB cap),
# so push to the DNS-only registry hostname instead of git.rogi.casa.
# Override with: REGISTRY=git.rogi.casa ./build-and-push.sh push (if grey-clouded)
REGISTRY="${REGISTRY:-registry.rogi.casa}"
REPO="roger/hermes-agent"
TAG="${TAG:-v1.35-1}"
IMAGE="${REGISTRY}/${REPO}:${TAG}"
MODE="${1:-push}"
cd "$(dirname "$0")"
echo "==> Building ${IMAGE}"
docker build --platform linux/amd64 -t "${IMAGE}" -f dockerfile .
case "$MODE" in
push)
echo "==> Pushing ${IMAGE}"
docker push "${IMAGE}"
echo "==> Done. Update platform-engineer/deployment.yaml image: if you changed TAG."
echo "==> Done. If the pod can't pull, create the gitea-registry secret in the namespace."
;;
local)
# Requires k3s + being run on the node the pod schedules to (roger-nucbox-evo-x2).
echo "==> Importing into k3s containerd (requires sudo)"
docker save "${IMAGE}" | sudo k3s ctr images import -
echo "==> Done. Verify: sudo k3s ctr images ls | grep hermes-agent"
echo " deployment.yaml is set to imagePullPolicy: IfNotPresent"
;;
*)
echo "Usage: $0 {push|local}" >&2
exit 1
;;
esac

View File

@@ -24,18 +24,37 @@ spec:
restartPolicy: OnFailure
containers:
- name: seed
image: bitnami/kubectl:1.35
# alpine is tiny and always available; we install curl + download the
# right-arch kubectl binary at runtime (bitnami/kubectl tags are
# inconsistent across versions, so we avoid depending on them).
image: alpine:3.20
command: ["sh", "-c"]
args:
- |
set -e
# Install curl, then download kubectl for this node's architecture.
apk add --no-cache curl
ARCH=$(uname -m)
case "$ARCH" in
x86_64) KARCH=amd64 ;;
aarch64) KARCH=arm64 ;;
armv7l) KARCH=arm ;;
*) echo "unsupported arch: $ARCH" >&2; exit 1 ;;
esac
echo "Downloading kubectl for linux/$KARCH ..."
curl -fsSL -o /usr/local/bin/kubectl \
"https://dl.k8s.io/release/v1.35.0/bin/linux/${KARCH}/kubectl"
chmod +x /usr/local/bin/kubectl
kubectl version --client
echo "Waiting for hermes pod to be Ready..."
kubectl -n platform-engineer wait --for=condition=Ready pod -l app=hermes --timeout=300s || true
POD=$(kubectl -n platform-engineer get pod -l app=hermes -o jsonpath='{.items[0].metadata.name}')
echo "Using pod: $POD"
exists() { kubectl -n platform-engineer exec "$POD" -- hermes cron list 2>/dev/null | grep -qi "name=$1\| $1 "; }
exists() { kubectl -n platform-engineer exec "$POD" -- hermes cron list 2>/dev/null | grep -qi " $1 "; }
create() {
name="$1"; schedule="$2"; deliver="$3"; prompt="$4"

View File

@@ -18,8 +18,7 @@ spec:
app: hermes
spec:
serviceAccountName: platform-engineer
imagePullSecrets:
- name: gitea-registry
# No imagePullSecrets — using the public stock Hermes image from Docker Hub.
# Pin to the powerful amd64 node (image is linux/amd64; the NUC has 24 GiB).
nodeSelector:
@@ -43,6 +42,28 @@ spec:
topologyKey: kubernetes.io/hostname
initContainers:
# Download kubectl + helm into a shared emptyDir so the stock Hermes image
# (which doesn't ship kubectl) can still drive the cluster. Avoids building
# and pushing a custom image through a slow / size-capped registry.
- name: install-tools
image: curlimages/curl:8.12.1
command: ["sh", "-c"]
args:
- |
set -e
echo "Downloading kubectl v1.35.0..."
curl -fsSL -o /tools/kubectl \
https://dl.k8s.io/release/v1.35.0/bin/linux/amd64/kubectl
chmod +x /tools/kubectl
echo "Downloading helm v3.16.3..."
curl -fsSL https://get.helm.sh/helm-v3.16.3-linux-amd64.tar.gz \
| tar -xz -C /tools --strip-components=1 linux-amd64/helm
chmod +x /tools/helm
echo "Tools installed:"; ls -la /tools
volumeMounts:
- name: tools
mountPath: /tools
# Seed /opt/data with config.yaml + SOUL.md on first boot only.
# ArgoCD owns the manifests; the PVC is runtime state and is NOT reconciled.
- name: seed-data
@@ -68,9 +89,13 @@ spec:
containers:
- name: hermes
image: git.rogi.casa/roger/hermes-agent:v1.35-1
image: nousresearch/hermes-agent:latest
imagePullPolicy: Always
command: ["gateway", "run"]
# IMPORTANT: do NOT set `command:` — it would override the image's
# ENTRYPOINT (/init, s6-overlay), which sets up the hermes user, seeds
# config on first boot, and supervises the gateway. The image's CMD
# (main-wrapper.sh) already routes `gateway run` through s6.
args: ["gateway", "run"]
ports:
- name: gateway
containerPort: 8642
@@ -80,14 +105,21 @@ spec:
- secretRef:
name: hermes-env
env:
# k3s injects these automatically; kubectl inside the pod uses the SA token.
# k3s injects KUBERNETES_SERVICE_HOST/PORT + the SA token automatically;
# kubectl inside the pod authenticates as the platform-engineer SA.
- name: HERMES_HOME
value: /opt/data
# Put the initContainer-installed kubectl/helm on PATH for the hermes user.
- name: PATH
value: /opt/hermes/bin:/opt/hermes/.venv/bin:/tools:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
volumeMounts:
- name: data
mountPath: /opt/data
- name: workspace
mountPath: /workspace
- name: tools
mountPath: /tools
readOnly: true
resources:
requests:
memory: "512Mi"
@@ -96,15 +128,19 @@ spec:
memory: "2Gi"
cpu: "1000m"
livenessProbe:
httpGet:
path: /health
port: 8642
initialDelaySeconds: 60
# Probe the dashboard port (9119, always enabled via HERMES_DASHBOARD=1
# and binds 0.0.0.0). The gateway API on 8642 is off by default
# (API_SERVER_ENABLED not set), so 9119 is the reliable liveness signal.
# s6 auto-restarts the gateway itself; this probe only catches a wedged
# container.
tcpSocket:
port: 9119
initialDelaySeconds: 90
periodSeconds: 30
failureThreshold: 3
timeoutSeconds: 5
failureThreshold: 5
securityContext:
allowPrivilegeEscalation: false
runAsNonRoot: false # official image runs as root for s6 init then drops to hermes
volumes:
- name: data
@@ -112,6 +148,8 @@ spec:
claimName: hermes-data
- name: workspace
emptyDir: {}
- name: tools
emptyDir: {}
- name: seed
configMap:
name: hermes-seed