name: nightly # Builds and deploys the staging environment from main every night. # Runs on the self-hosted runner using Docker-out-of-Docker (the docker # socket is mounted in), so `docker compose build` produces images on # the host daemon and `docker compose up` consumes them directly — no # registry hop. # # Operational assumptions (see docs/DEPLOYMENT.md §3 for the full setup): # # 1. Single-tenant self-hosted runner. The "Write staging env file" step # writes every secret to .env.staging on the runner filesystem; the # `if: always()` cleanup step removes it. A multi-tenant runner # would need to switch to docker compose --env-file <(stdin) instead. # # 2. Host docker layer cache is authoritative. There is no # actions/cache; we rely on the host daemon to keep Maven and npm # layers warm between runs. A `docker system prune` on the host # will cause the next nightly build to be cold (5–10 min slower). # # Staging environment isolation: # - project name: archiv-staging # - host ports: backend 8081, frontend 3001 # - profile: staging (starts mailpit instead of a real SMTP relay) # # Required Gitea secrets: # STAGING_POSTGRES_PASSWORD # STAGING_MINIO_PASSWORD # STAGING_MINIO_APP_PASSWORD # STAGING_OCR_TRAINING_TOKEN # STAGING_APP_ADMIN_USERNAME # STAGING_APP_ADMIN_PASSWORD on: schedule: - cron: "0 2 * * *" workflow_dispatch: env: # Ensures the backend Dockerfile's `RUN --mount=type=cache` lines are # honoured (Maven cache survives between runs). DOCKER_BUILDKIT: "1" jobs: deploy-staging: # `ubuntu-latest` matches our self-hosted runner's advertised label # (the runner has labels: ubuntu-latest / ubuntu-24.04 / ubuntu-22.04). # `self-hosted` would never match — no runner advertises it — so the # job parks in the queue forever. ADR-011's "single-tenant" promise # is at the repo level; sharing this runner between CI and deploys # for the same repo is within that boundary. runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - name: Write staging env file run: | cat > .env.staging < /tmp/compose-rendered.yml grep -q '^[[:space:]]*target: /import$' /tmp/compose-rendered.yml \ || { echo "::error::backend is missing the /import bind mount (see #526)"; exit 1; } grep -A2 '^[[:space:]]*target: /import$' /tmp/compose-rendered.yml \ | grep -q 'read_only: true' \ || { echo "::error::backend /import mount is not read-only (see #526)"; exit 1; } - name: Build images # `--pull` forces re-fetching pinned base images so a CVE # re-publication of the same tag (e.g. node:20.19.0-alpine3.21, # postgres:16-alpine) is picked up instead of being served # from the host's stale Docker layer cache. run: | docker compose \ -f docker-compose.prod.yml \ -p archiv-staging \ --env-file .env.staging \ --profile staging \ build --pull - name: Deploy staging run: | docker compose \ -f docker-compose.prod.yml \ -p archiv-staging \ --env-file .env.staging \ --profile staging \ up -d --wait --remove-orphans - name: Reload Caddy # Apply any committed Caddyfile changes before smoke-testing the # public surface. Without this step, a Caddyfile edit lands in the # repo but Caddy keeps serving the previous config until someone # reloads it manually — the smoke test would then catch a stale # header or a still-proxied /actuator route rather than confirming # the current config is live. # # The runner executes job steps inside Docker containers (DooD). # `systemctl` is not present in Ubuntu container images and cannot # reach the host's systemd directly. We use the Docker socket # (mounted into every job container via runner-config.yaml) to spin # up a privileged sibling container in the host PID namespace; # nsenter then enters the host's namespaces so systemctl talks to # the real host systemd daemon. No sudoers entry is required — the # Docker socket already grants root-equivalent host access. # # `systemctl reload caddy` sends SIGHUP; Caddy re-reads # /etc/caddy/Caddyfile (symlinked to infra/caddy/Caddyfile) without # dropping connections. If Caddy is not running this step fails fast # before the smoke test issues a misleading "port 443 refused" error. run: | docker run --rm --privileged --pid=host \ ubuntu:22.04 \ nsenter -t 1 -m -u -n -p -i -- /bin/systemctl reload caddy - name: Smoke test deployed environment # Healthchecks confirm containers are healthy; they do NOT confirm the # public surface works. This step catches: Caddy not reloaded, HSTS # header dropped, /actuator block bypassed. # # --resolve pins staging.raddatz.cloud to the runner's loopback so we # do NOT depend on the host router doing hairpin NAT (many SOHO # routers do not, or do so only after a firmware update). SNI still # uses the public hostname so the cert validates correctly. run: | set -e HOST="staging.raddatz.cloud" URL="https://$HOST" RESOLVE="--resolve $HOST:443:127.0.0.1" echo "Smoke test: $URL (pinned to 127.0.0.1)" curl -fsS $RESOLVE --max-time 10 "$URL/login" -o /dev/null # Pin the preload-list-eligible HSTS value, not just header presence: # a degraded `max-age=1` or a dropped `includeSubDomains; preload` must # fail this check rather than pass it silently. curl -fsS $RESOLVE --max-time 10 -I "$URL/" \ | grep -Eqi 'strict-transport-security:[[:space:]]*max-age=31536000.*includeSubDomains.*preload' # Permissions-Policy denies APIs the app does not use (camera, # microphone, geolocation). A regression that loosens or drops the # header now fails the smoke step. curl -fsS $RESOLVE --max-time 10 -I "$URL/" \ | grep -Eqi 'permissions-policy:[[:space:]]*camera=\(\),[[:space:]]*microphone=\(\),[[:space:]]*geolocation=\(\)' status=$(curl -s $RESOLVE -o /dev/null -w "%{http_code}" --max-time 10 "$URL/actuator/health") [ "$status" = "404" ] || { echo "expected 404 from /actuator/health, got $status"; exit 1; } echo "All smoke checks passed" - name: Cleanup env file # LOAD-BEARING: `if: always()` is the linchpin of the ADR-011 # single-tenant runner trust model. Every secret in .env.staging # is plain text on the runner filesystem until this step runs. # If a future refactor drops `if: always()`, a failed deploy # leaves the env-file behind. Do not remove this conditional # without first re-evaluating ADR-011. if: always() run: rm -f .env.staging