ci(deploy): wire nightly.yml to composite deploy actions
Replaces the four inline obs steps with one uses: ./.gitea/actions/deploy-obs, and the Caddy reload + smoke test with one uses: each (host staging.raddatz.cloud, postgres_host archiv-staging-db-1, STAGING_* secrets). checkout@v4 stays the first step; the #526 /import mount guard stays inline. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -23,6 +23,11 @@ name: nightly
|
|||||||
# - host ports: backend 8081, frontend 3001
|
# - host ports: backend 8081, frontend 3001
|
||||||
# - profile: staging (starts mailpit instead of a real SMTP relay)
|
# - profile: staging (starts mailpit instead of a real SMTP relay)
|
||||||
#
|
#
|
||||||
|
# The obs-stack deploy, Caddy reload, and smoke test are shared with
|
||||||
|
# release.yml via the composite actions under .gitea/actions/ (ADR-029).
|
||||||
|
# actions/checkout MUST stay the first step: a local `uses: ./…` action
|
||||||
|
# only exists on disk after checkout.
|
||||||
|
#
|
||||||
# Required Gitea secrets:
|
# Required Gitea secrets:
|
||||||
# STAGING_POSTGRES_PASSWORD
|
# STAGING_POSTGRES_PASSWORD
|
||||||
# STAGING_MINIO_PASSWORD
|
# STAGING_MINIO_PASSWORD
|
||||||
@@ -55,6 +60,8 @@ jobs:
|
|||||||
# for the same repo is within that boundary.
|
# for the same repo is within that boundary.
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
|
# MUST be first: the composite actions below live under .gitea/actions/
|
||||||
|
# and only exist on disk once the repo is checked out (ADR-029).
|
||||||
- uses: actions/checkout@v4
|
- uses: actions/checkout@v4
|
||||||
|
|
||||||
- name: Write staging env file
|
- name: Write staging env file
|
||||||
@@ -92,6 +99,7 @@ jobs:
|
|||||||
# `compose config` renders both shorthand and longform mounts as
|
# `compose config` renders both shorthand and longform mounts as
|
||||||
# `target: /import` + `read_only: true`, so we assert against
|
# `target: /import` + `read_only: true`, so we assert against
|
||||||
# the rendered form rather than the raw source YAML.
|
# the rendered form rather than the raw source YAML.
|
||||||
|
# App-compose check (not obs), nightly-only — stays inline.
|
||||||
run: |
|
run: |
|
||||||
set -e
|
set -e
|
||||||
docker compose \
|
docker compose \
|
||||||
@@ -128,150 +136,21 @@ jobs:
|
|||||||
--profile staging \
|
--profile staging \
|
||||||
up -d --wait --remove-orphans
|
up -d --wait --remove-orphans
|
||||||
|
|
||||||
- name: Deploy observability configs
|
# POSTGRES_HOST is derived from the Compose project name (archiv-staging)
|
||||||
# Copies the compose file and config tree from the workspace checkout
|
|
||||||
# into /opt/familienarchiv/ — the permanent location that persists
|
|
||||||
# between CI runs. Containers started in the next step bind-mount
|
|
||||||
# from there, so a future workspace wipe cannot corrupt a running
|
|
||||||
# config file.
|
|
||||||
#
|
|
||||||
# obs-secrets.env is written fresh from Gitea secrets on every run so
|
|
||||||
# Gitea is always the single source of truth for secret rotation.
|
|
||||||
# Non-secret config lives in infra/observability/obs.env (tracked in git).
|
|
||||||
run: |
|
|
||||||
rm -rf /opt/familienarchiv/infra/observability
|
|
||||||
mkdir -p /opt/familienarchiv/infra/observability
|
|
||||||
cp -r infra/observability/. /opt/familienarchiv/infra/observability/
|
|
||||||
cp docker-compose.observability.yml /opt/familienarchiv/
|
|
||||||
cat > /opt/familienarchiv/obs-secrets.env <<'EOF'
|
|
||||||
GRAFANA_ADMIN_PASSWORD=${{ secrets.GRAFANA_ADMIN_PASSWORD }}
|
|
||||||
GRAFANA_DB_PASSWORD=${{ secrets.GRAFANA_DB_PASSWORD }}
|
|
||||||
GLITCHTIP_SECRET_KEY=${{ secrets.GLITCHTIP_SECRET_KEY }}
|
|
||||||
POSTGRES_PASSWORD=${{ secrets.STAGING_POSTGRES_PASSWORD }}
|
|
||||||
POSTGRES_HOST=archiv-staging-db-1
|
|
||||||
EOF
|
|
||||||
# Note: POSTGRES_HOST is derived from the Compose project name (archiv-staging)
|
|
||||||
# and service name (db). A project rename requires updating this value.
|
# and service name (db). A project rename requires updating this value.
|
||||||
chmod 600 /opt/familienarchiv/obs-secrets.env
|
- uses: ./.gitea/actions/deploy-obs
|
||||||
|
with:
|
||||||
|
grafana_admin_password: ${{ secrets.GRAFANA_ADMIN_PASSWORD }}
|
||||||
|
grafana_db_password: ${{ secrets.GRAFANA_DB_PASSWORD }}
|
||||||
|
glitchtip_secret_key: ${{ secrets.GLITCHTIP_SECRET_KEY }}
|
||||||
|
postgres_password: ${{ secrets.STAGING_POSTGRES_PASSWORD }}
|
||||||
|
postgres_host: archiv-staging-db-1
|
||||||
|
|
||||||
- name: Validate observability compose config
|
- uses: ./.gitea/actions/reload-caddy
|
||||||
# Dry-run: resolves all variable substitutions and reports any missing
|
|
||||||
# required keys before containers start. Catches undefined variables and
|
|
||||||
# YAML errors in config files updated by the previous step.
|
|
||||||
# --env-file order: obs.env first (git-tracked defaults), obs-secrets.env
|
|
||||||
# second (CI-written secrets). Later files win on duplicate keys, so
|
|
||||||
# obs-secrets.env overrides POSTGRES_HOST set in obs.env.
|
|
||||||
run: |
|
|
||||||
docker compose \
|
|
||||||
-f /opt/familienarchiv/docker-compose.observability.yml \
|
|
||||||
--env-file /opt/familienarchiv/infra/observability/obs.env \
|
|
||||||
--env-file /opt/familienarchiv/obs-secrets.env \
|
|
||||||
config --quiet
|
|
||||||
|
|
||||||
- name: Start observability stack
|
- uses: ./.gitea/actions/smoke-test
|
||||||
# Runs with absolute paths so bind mounts resolve to stable host paths
|
with:
|
||||||
# that survive workspace wipes between nightly runs (see ADR-016).
|
host: staging.raddatz.cloud
|
||||||
# Non-secret config from obs.env (git-tracked); secrets from obs-secrets.env
|
|
||||||
# (written fresh from Gitea secrets above). --env-file order: obs.env first,
|
|
||||||
# obs-secrets.env second — later file wins on duplicate keys.
|
|
||||||
run: |
|
|
||||||
docker compose \
|
|
||||||
-f /opt/familienarchiv/docker-compose.observability.yml \
|
|
||||||
--env-file /opt/familienarchiv/infra/observability/obs.env \
|
|
||||||
--env-file /opt/familienarchiv/obs-secrets.env \
|
|
||||||
up -d --wait --remove-orphans
|
|
||||||
|
|
||||||
- name: Assert observability stack health
|
|
||||||
# docker compose up --wait covers services WITH healthcheck directives only.
|
|
||||||
# obs-promtail, obs-cadvisor, obs-node-exporter, and obs-glitchtip-worker have
|
|
||||||
# no healthcheck — they are considered "started" as soon as the process runs.
|
|
||||||
# This step explicitly asserts the five healthchecked critical services are
|
|
||||||
# healthy before the smoke test proceeds.
|
|
||||||
run: |
|
|
||||||
set -e
|
|
||||||
unhealthy=""
|
|
||||||
for svc in obs-loki obs-prometheus obs-grafana obs-tempo obs-glitchtip; do
|
|
||||||
status=$(docker inspect "$svc" --format '{{.State.Health.Status}}' 2>/dev/null || echo "missing")
|
|
||||||
if [ "$status" != "healthy" ]; then
|
|
||||||
echo "::error::$svc is not healthy (status: $status)"
|
|
||||||
unhealthy="$unhealthy $svc"
|
|
||||||
fi
|
|
||||||
done
|
|
||||||
[ -z "$unhealthy" ] || exit 1
|
|
||||||
echo "All critical observability services are healthy"
|
|
||||||
|
|
||||||
- name: Reload Caddy
|
|
||||||
# Apply any committed Caddyfile changes before smoke-testing the
|
|
||||||
# public surface. Without this step, a Caddyfile edit lands in the
|
|
||||||
# repo but Caddy keeps serving the previous config until someone
|
|
||||||
# reloads it manually — the smoke test would then catch a stale
|
|
||||||
# header or a still-proxied /actuator route rather than confirming
|
|
||||||
# the current config is live.
|
|
||||||
#
|
|
||||||
# The runner executes job steps inside Docker containers (DooD).
|
|
||||||
# `systemctl` is not present in container images and cannot reach
|
|
||||||
# the host's systemd directly. We use the Docker socket (mounted
|
|
||||||
# into every job container via runner-config.yaml) to spin up a
|
|
||||||
# privileged sibling container in the host PID namespace; nsenter
|
|
||||||
# then enters the host's namespaces so systemctl talks to the real
|
|
||||||
# host systemd daemon. No sudoers entry is required — the Docker
|
|
||||||
# socket already grants root-equivalent host access.
|
|
||||||
#
|
|
||||||
# Alpine is used: ~5 MB vs ~70 MB for ubuntu, no unnecessary
|
|
||||||
# tooling, and the digest is pinned so any upstream change requires
|
|
||||||
# an explicit bump PR. util-linux (which ships nsenter) is installed
|
|
||||||
# at run time; apk add takes ~1 s on the warm VPS cache.
|
|
||||||
#
|
|
||||||
# `reload` not `restart`: reload sends SIGHUP so Caddy re-reads its
|
|
||||||
# config in-process without dropping TLS connections. `restart`
|
|
||||||
# would briefly stop the service, losing in-flight requests.
|
|
||||||
#
|
|
||||||
# If Caddy is not running this step fails fast before the smoke test
|
|
||||||
# issues a misleading "port 443 refused" error.
|
|
||||||
run: |
|
|
||||||
docker run --rm --privileged --pid=host \
|
|
||||||
alpine:3.21@sha256:48b0309ca019d89d40f670aa1bc06e426dc0931948452e8491e3d65087abc07d \
|
|
||||||
sh -c 'apk add --no-cache util-linux -q && nsenter -t 1 -m -u -n -p -i -- /bin/systemctl reload caddy'
|
|
||||||
|
|
||||||
- name: Smoke test deployed environment
|
|
||||||
# Healthchecks confirm containers are healthy; they do NOT confirm the
|
|
||||||
# public surface works. This step catches: Caddy not reloaded, HSTS
|
|
||||||
# header dropped, /actuator block bypassed.
|
|
||||||
#
|
|
||||||
# --resolve pins staging.raddatz.cloud to the Docker bridge gateway IP
|
|
||||||
# (the host) so we do NOT depend on hairpin NAT on the host router.
|
|
||||||
# 127.0.0.1 cannot be used: job containers run in bridge network mode
|
|
||||||
# (runner-config.yaml), so 127.0.0.1 is the container's loopback, not
|
|
||||||
# the host's. The bridge gateway IS the host; Caddy binds 0.0.0.0:443
|
|
||||||
# and is therefore reachable from the container via that IP.
|
|
||||||
# SNI still uses the public hostname so the TLS cert validates correctly.
|
|
||||||
#
|
|
||||||
# Gateway detection reads /proc/net/route (always present, no package
|
|
||||||
# required) instead of `ip route` to avoid a dependency on iproute2.
|
|
||||||
# Field $2=="00000000" is the default route; field $3 is the gateway as
|
|
||||||
# a little-endian 32-bit hex value which awk decodes to dotted-decimal.
|
|
||||||
run: |
|
|
||||||
set -e
|
|
||||||
HOST="staging.raddatz.cloud"
|
|
||||||
URL="https://$HOST"
|
|
||||||
HOST_IP=$(awk 'NR>1 && $2=="00000000"{h=$3;printf "%d.%d.%d.%d\n",strtonum("0x"substr(h,7,2)),strtonum("0x"substr(h,5,2)),strtonum("0x"substr(h,3,2)),strtonum("0x"substr(h,1,2));exit}' /proc/net/route)
|
|
||||||
[ -n "$HOST_IP" ] || { echo "ERROR: could not detect Docker bridge gateway via /proc/net/route"; exit 1; }
|
|
||||||
RESOLVE=(--resolve "$HOST:443:$HOST_IP")
|
|
||||||
echo "Smoke test: $URL (pinned to $HOST_IP via bridge gateway)"
|
|
||||||
curl -fsS "${RESOLVE[@]}" --max-time 10 "$URL/login" -o /dev/null
|
|
||||||
# Pin the preload-list-eligible HSTS value, not just header presence:
|
|
||||||
# a degraded `max-age=1` or a dropped `includeSubDomains; preload` must
|
|
||||||
# fail this check rather than pass it silently.
|
|
||||||
curl -fsS "${RESOLVE[@]}" --max-time 10 -I "$URL/" \
|
|
||||||
| grep -Eqi 'strict-transport-security:[[:space:]]*max-age=31536000.*includeSubDomains.*preload'
|
|
||||||
# Permissions-Policy denies APIs the app does not use (camera,
|
|
||||||
# microphone, geolocation). A regression that loosens or drops the
|
|
||||||
# header now fails the smoke step.
|
|
||||||
curl -fsS "${RESOLVE[@]}" --max-time 10 -I "$URL/" \
|
|
||||||
| grep -Eqi 'permissions-policy:[[:space:]]*camera=\(\),[[:space:]]*microphone=\(\),[[:space:]]*geolocation=\(\)'
|
|
||||||
status=$(curl -s "${RESOLVE[@]}" -o /dev/null -w "%{http_code}" --max-time 10 "$URL/actuator/health")
|
|
||||||
[ "$status" = "404" ] || { echo "expected 404 from /actuator/health, got $status"; exit 1; }
|
|
||||||
echo "All smoke checks passed"
|
|
||||||
|
|
||||||
- name: Cleanup env file
|
- name: Cleanup env file
|
||||||
# LOAD-BEARING: `if: always()` is the linchpin of the ADR-011
|
# LOAD-BEARING: `if: always()` is the linchpin of the ADR-011
|
||||||
|
|||||||
Reference in New Issue
Block a user