Compare commits

...

4 Commits

Author SHA1 Message Date
Roger Oriol
734962d198 fix hermes liveness probe 2026-06-28 00:43:09 +02:00
Roger Oriol
4d9195b32d use hermes stock image for platform engineer 2026-06-27 21:00:03 +02:00
Roger Oriol
54579df4b3 use hermes stock image for platform engineer 2026-06-27 20:40:42 +02:00
Roger Oriol
3f3467cb13 gitea registry ingress 2026-06-27 11:46:53 +02:00
3 changed files with 99 additions and 23 deletions

35
platform-engineer/build-and-push.sh Normal file → Executable file
View File

@@ -1,24 +1,43 @@
#!/usr/bin/env bash #!/usr/bin/env bash
# Build & push the derived Hermes image (kubectl + helm) to the Gitea registry. # Build & push the derived Hermes image (kubectl + helm).
# #
# Run this on a machine with docker + access to git.rogi.casa: # Two modes:
# ./platform-engineer/build-and-push.sh # ./build-and-push.sh push # build + push to the Gitea registry
# ./build-and-push.sh local # build + import directly into the NUC's k3s containerd
# # (no registry needed; pod is pinned to this node)
# #
# Prereqs: # Default (no arg): push.
# - docker login git.rogi.casa (use your Gitea username + access token)
set -euo pipefail set -euo pipefail
REGISTRY="git.rogi.casa" # Docker registry pushes can't go through the Cloudflare proxy (100 MB cap),
# so push to the DNS-only registry hostname instead of git.rogi.casa.
# Override with: REGISTRY=git.rogi.casa ./build-and-push.sh push (if grey-clouded)
REGISTRY="${REGISTRY:-registry.rogi.casa}"
REPO="roger/hermes-agent" REPO="roger/hermes-agent"
TAG="${TAG:-v1.35-1}" TAG="${TAG:-v1.35-1}"
IMAGE="${REGISTRY}/${REPO}:${TAG}" IMAGE="${REGISTRY}/${REPO}:${TAG}"
MODE="${1:-push}"
cd "$(dirname "$0")" cd "$(dirname "$0")"
echo "==> Building ${IMAGE}" echo "==> Building ${IMAGE}"
docker build --platform linux/amd64 -t "${IMAGE}" -f dockerfile . docker build --platform linux/amd64 -t "${IMAGE}" -f dockerfile .
case "$MODE" in
push)
echo "==> Pushing ${IMAGE}" echo "==> Pushing ${IMAGE}"
docker push "${IMAGE}" docker push "${IMAGE}"
echo "==> Done. If the pod can't pull, create the gitea-registry secret in the namespace."
echo "==> Done. Update platform-engineer/deployment.yaml image: if you changed TAG." ;;
local)
# Requires k3s + being run on the node the pod schedules to (roger-nucbox-evo-x2).
echo "==> Importing into k3s containerd (requires sudo)"
docker save "${IMAGE}" | sudo k3s ctr images import -
echo "==> Done. Verify: sudo k3s ctr images ls | grep hermes-agent"
echo " deployment.yaml is set to imagePullPolicy: IfNotPresent"
;;
*)
echo "Usage: $0 {push|local}" >&2
exit 1
;;
esac

View File

@@ -24,18 +24,37 @@ spec:
restartPolicy: OnFailure restartPolicy: OnFailure
containers: containers:
- name: seed - name: seed
image: bitnami/kubectl:1.35 # alpine is tiny and always available; we install curl + download the
# right-arch kubectl binary at runtime (bitnami/kubectl tags are
# inconsistent across versions, so we avoid depending on them).
image: alpine:3.20
command: ["sh", "-c"] command: ["sh", "-c"]
args: args:
- | - |
set -e set -e
# Install curl, then download kubectl for this node's architecture.
apk add --no-cache curl
ARCH=$(uname -m)
case "$ARCH" in
x86_64) KARCH=amd64 ;;
aarch64) KARCH=arm64 ;;
armv7l) KARCH=arm ;;
*) echo "unsupported arch: $ARCH" >&2; exit 1 ;;
esac
echo "Downloading kubectl for linux/$KARCH ..."
curl -fsSL -o /usr/local/bin/kubectl \
"https://dl.k8s.io/release/v1.35.0/bin/linux/${KARCH}/kubectl"
chmod +x /usr/local/bin/kubectl
kubectl version --client
echo "Waiting for hermes pod to be Ready..." echo "Waiting for hermes pod to be Ready..."
kubectl -n platform-engineer wait --for=condition=Ready pod -l app=hermes --timeout=300s || true kubectl -n platform-engineer wait --for=condition=Ready pod -l app=hermes --timeout=300s || true
POD=$(kubectl -n platform-engineer get pod -l app=hermes -o jsonpath='{.items[0].metadata.name}') POD=$(kubectl -n platform-engineer get pod -l app=hermes -o jsonpath='{.items[0].metadata.name}')
echo "Using pod: $POD" echo "Using pod: $POD"
exists() { kubectl -n platform-engineer exec "$POD" -- hermes cron list 2>/dev/null | grep -qi "name=$1\| $1 "; } exists() { kubectl -n platform-engineer exec "$POD" -- hermes cron list 2>/dev/null | grep -qi " $1 "; }
create() { create() {
name="$1"; schedule="$2"; deliver="$3"; prompt="$4" name="$1"; schedule="$2"; deliver="$3"; prompt="$4"

View File

@@ -18,8 +18,7 @@ spec:
app: hermes app: hermes
spec: spec:
serviceAccountName: platform-engineer serviceAccountName: platform-engineer
imagePullSecrets: # No imagePullSecrets — using the public stock Hermes image from Docker Hub.
- name: gitea-registry
# Pin to the powerful amd64 node (image is linux/amd64; the NUC has 24 GiB). # Pin to the powerful amd64 node (image is linux/amd64; the NUC has 24 GiB).
nodeSelector: nodeSelector:
@@ -43,6 +42,28 @@ spec:
topologyKey: kubernetes.io/hostname topologyKey: kubernetes.io/hostname
initContainers: initContainers:
# Download kubectl + helm into a shared emptyDir so the stock Hermes image
# (which doesn't ship kubectl) can still drive the cluster. Avoids building
# and pushing a custom image through a slow / size-capped registry.
- name: install-tools
image: curlimages/curl:8.12.1
command: ["sh", "-c"]
args:
- |
set -e
echo "Downloading kubectl v1.35.0..."
curl -fsSL -o /tools/kubectl \
https://dl.k8s.io/release/v1.35.0/bin/linux/amd64/kubectl
chmod +x /tools/kubectl
echo "Downloading helm v3.16.3..."
curl -fsSL https://get.helm.sh/helm-v3.16.3-linux-amd64.tar.gz \
| tar -xz -C /tools --strip-components=1 linux-amd64/helm
chmod +x /tools/helm
echo "Tools installed:"; ls -la /tools
volumeMounts:
- name: tools
mountPath: /tools
# Seed /opt/data with config.yaml + SOUL.md on first boot only. # Seed /opt/data with config.yaml + SOUL.md on first boot only.
# ArgoCD owns the manifests; the PVC is runtime state and is NOT reconciled. # ArgoCD owns the manifests; the PVC is runtime state and is NOT reconciled.
- name: seed-data - name: seed-data
@@ -68,9 +89,13 @@ spec:
containers: containers:
- name: hermes - name: hermes
image: git.rogi.casa/roger/hermes-agent:v1.35-1 image: nousresearch/hermes-agent:latest
imagePullPolicy: Always imagePullPolicy: Always
command: ["gateway", "run"] # IMPORTANT: do NOT set `command:` — it would override the image's
# ENTRYPOINT (/init, s6-overlay), which sets up the hermes user, seeds
# config on first boot, and supervises the gateway. The image's CMD
# (main-wrapper.sh) already routes `gateway run` through s6.
args: ["gateway", "run"]
ports: ports:
- name: gateway - name: gateway
containerPort: 8642 containerPort: 8642
@@ -80,14 +105,21 @@ spec:
- secretRef: - secretRef:
name: hermes-env name: hermes-env
env: env:
# k3s injects these automatically; kubectl inside the pod uses the SA token. # k3s injects KUBERNETES_SERVICE_HOST/PORT + the SA token automatically;
# kubectl inside the pod authenticates as the platform-engineer SA.
- name: HERMES_HOME - name: HERMES_HOME
value: /opt/data value: /opt/data
# Put the initContainer-installed kubectl/helm on PATH for the hermes user.
- name: PATH
value: /opt/hermes/bin:/opt/hermes/.venv/bin:/tools:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
volumeMounts: volumeMounts:
- name: data - name: data
mountPath: /opt/data mountPath: /opt/data
- name: workspace - name: workspace
mountPath: /workspace mountPath: /workspace
- name: tools
mountPath: /tools
readOnly: true
resources: resources:
requests: requests:
memory: "512Mi" memory: "512Mi"
@@ -96,15 +128,19 @@ spec:
memory: "2Gi" memory: "2Gi"
cpu: "1000m" cpu: "1000m"
livenessProbe: livenessProbe:
httpGet: # Probe the dashboard port (9119, always enabled via HERMES_DASHBOARD=1
path: /health # and binds 0.0.0.0). The gateway API on 8642 is off by default
port: 8642 # (API_SERVER_ENABLED not set), so 9119 is the reliable liveness signal.
initialDelaySeconds: 60 # s6 auto-restarts the gateway itself; this probe only catches a wedged
# container.
tcpSocket:
port: 9119
initialDelaySeconds: 90
periodSeconds: 30 periodSeconds: 30
failureThreshold: 3 timeoutSeconds: 5
failureThreshold: 5
securityContext: securityContext:
allowPrivilegeEscalation: false allowPrivilegeEscalation: false
runAsNonRoot: false # official image runs as root for s6 init then drops to hermes
volumes: volumes:
- name: data - name: data
@@ -112,6 +148,8 @@ spec:
claimName: hermes-data claimName: hermes-data
- name: workspace - name: workspace
emptyDir: {} emptyDir: {}
- name: tools
emptyDir: {}
- name: seed - name: seed
configMap: configMap:
name: hermes-seed name: hermes-seed