familienarchiv/.gitea/workflows/nightly.yml

name: nightly

# Builds and deploys the staging environment from main every night.
# Runs on the self-hosted runner using Docker-out-of-Docker (the docker
# socket is mounted in), so `docker compose build` produces images on
# the host daemon and `docker compose up` consumes them directly — no
# registry hop.
#
# Operational assumptions (see docs/DEPLOYMENT.md §3 for the full setup):
#
#   1. Single-tenant self-hosted runner. The "Write staging env file" step
#      writes every secret to .env.staging on the runner filesystem; the
#      `if: always()` cleanup step removes it. A multi-tenant runner
#      would need to switch to docker compose --env-file <(stdin) instead.
#
#   2. Host docker layer cache is authoritative. There is no
#      actions/cache; we rely on the host daemon to keep Maven and npm
#      layers warm between runs. A `docker system prune` on the host
#      will cause the next nightly build to be cold (5–10 min slower).
#
# Staging environment isolation:
#   - project name: archiv-staging
#   - host ports:   backend 8081, frontend 3001
#   - profile:      staging (starts mailpit instead of a real SMTP relay)
#
# Required Gitea secrets:
#   STAGING_POSTGRES_PASSWORD
#   STAGING_MINIO_PASSWORD
#   STAGING_MINIO_APP_PASSWORD
#   STAGING_OCR_TRAINING_TOKEN
#   STAGING_APP_ADMIN_USERNAME
#   STAGING_APP_ADMIN_PASSWORD

on:
  schedule:
    - cron: "0 2 * * *"
  workflow_dispatch:

env:
  # Ensures the backend Dockerfile's `RUN --mount=type=cache` lines are
  # honoured (Maven cache survives between runs).
  DOCKER_BUILDKIT: "1"

jobs:
  deploy-staging:
    runs-on: self-hosted
    steps:
      - uses: actions/checkout@v4

      - name: Write staging env file
        run: |
          cat > .env.staging <<EOF
          TAG=nightly
          PORT_BACKEND=8081
          PORT_FRONTEND=3001
          APP_DOMAIN=staging.raddatz.cloud
          POSTGRES_PASSWORD=${{ secrets.STAGING_POSTGRES_PASSWORD }}
          MINIO_PASSWORD=${{ secrets.STAGING_MINIO_PASSWORD }}
          MINIO_APP_PASSWORD=${{ secrets.STAGING_MINIO_APP_PASSWORD }}
          OCR_TRAINING_TOKEN=${{ secrets.STAGING_OCR_TRAINING_TOKEN }}
          APP_ADMIN_USERNAME=${{ secrets.STAGING_APP_ADMIN_USERNAME }}
          APP_ADMIN_PASSWORD=${{ secrets.STAGING_APP_ADMIN_PASSWORD }}
          MAIL_HOST=mailpit
          MAIL_PORT=1025
          MAIL_USERNAME=
          MAIL_PASSWORD=
          MAIL_SMTP_AUTH=false
          MAIL_STARTTLS_ENABLE=false
          APP_MAIL_FROM=noreply@staging.raddatz.cloud
          EOF

      - name: Build images
        # `--pull` forces re-fetching pinned base images so a CVE
        # re-publication of the same tag (e.g. node:20.19.0-alpine3.21,
        # postgres:16-alpine) is picked up instead of being served
        # from the host's stale Docker layer cache.
        run: |
          docker compose \
            -f docker-compose.prod.yml \
            -p archiv-staging \
            --env-file .env.staging \
            --profile staging \
            build --pull

      - name: Deploy staging
        run: |
          docker compose \
            -f docker-compose.prod.yml \
            -p archiv-staging \
            --env-file .env.staging \
            --profile staging \
            up -d --wait --remove-orphans

      - name: Smoke test deployed environment
        # Healthchecks confirm containers are healthy; they do NOT confirm the
        # public surface works. This step catches: Caddy not reloaded, HSTS
        # header dropped, /actuator block bypassed.
        #
        # --resolve pins staging.raddatz.cloud to the runner's loopback so we
        # do NOT depend on the host router doing hairpin NAT (many SOHO
        # routers do not, or do so only after a firmware update). SNI still
        # uses the public hostname so the cert validates correctly.
        run: |
          set -e
          HOST="staging.raddatz.cloud"
          URL="https://$HOST"
          RESOLVE="--resolve $HOST:443:127.0.0.1"
          echo "Smoke test: $URL (pinned to 127.0.0.1)"
          curl -fsS $RESOLVE --max-time 10 "$URL/login" -o /dev/null
          curl -fsS $RESOLVE --max-time 10 -I "$URL/" | grep -qi 'strict-transport-security'
          status=$(curl -s $RESOLVE -o /dev/null -w "%{http_code}" --max-time 10 "$URL/actuator/health")
          [ "$status" = "404" ] || { echo "expected 404 from /actuator/health, got $status"; exit 1; }
          echo "All smoke checks passed"

      - name: Cleanup env file
        if: always()
        run: rm -f .env.staging