From 79735e23e08594f50456533b8f646736f028346d Mon Sep 17 00:00:00 2001 From: Marcel Date: Sat, 16 May 2026 00:01:48 +0200 Subject: [PATCH] ci(obs): assert obs-loki/prometheus/grafana/tempo are healthy after stack up Co-Authored-By: Claude Sonnet 4.6 --- .gitea/workflows/nightly.yml | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/.gitea/workflows/nightly.yml b/.gitea/workflows/nightly.yml index 97643ee3..d4af264d 100644 --- a/.gitea/workflows/nightly.yml +++ b/.gitea/workflows/nightly.yml @@ -162,6 +162,25 @@ jobs: -f /opt/familienarchiv/docker-compose.observability.yml \ up -d --wait --remove-orphans + - name: Assert observability stack health + # docker compose up --wait covers services WITH healthcheck directives only. + # obs-promtail, obs-cadvisor, obs-node-exporter, and obs-glitchtip-worker have + # no healthcheck — they are considered "started" as soon as the process runs. + # This step explicitly asserts the four healthchecked critical services are + # healthy before the smoke test proceeds. + run: | + set -e + unhealthy="" + for svc in obs-loki obs-prometheus obs-grafana obs-tempo; do + status=$(docker inspect "$svc" --format '{{.State.Health.Status}}' 2>/dev/null || echo "missing") + if [ "$status" != "healthy" ]; then + echo "::error::$svc is not healthy (status: $status)" + unhealthy="$unhealthy $svc" + fi + done + [ -z "$unhealthy" ] || exit 1 + echo "All critical observability services are healthy" + - name: Reload Caddy # Apply any committed Caddyfile changes before smoke-testing the # public surface. Without this step, a Caddyfile edit lands in the