name: nightly # Builds and deploys the staging environment from main every night. # Runs on the self-hosted runner using Docker-out-of-Docker (the docker # socket is mounted in), so `docker compose build` produces images on # the host daemon and `docker compose up` consumes them directly — no # registry hop. # # Operational assumptions (see docs/DEPLOYMENT.md §3 for the full setup): # # 1. Single-tenant self-hosted runner. The "Write staging env file" step # writes every secret to .env.staging on the runner filesystem; the # `if: always()` cleanup step removes it. A multi-tenant runner # would need to switch to docker compose --env-file <(stdin) instead. # # 2. Host docker layer cache is authoritative. There is no # actions/cache; we rely on the host daemon to keep Maven and npm # layers warm between runs. A `docker system prune` on the host # will cause the next nightly build to be cold (5–10 min slower). # # Staging environment isolation: # - project name: archiv-staging # - host ports: backend 8081, frontend 3001 # - profile: staging (starts mailpit instead of a real SMTP relay) # # The obs-stack deploy, Caddy reload, and smoke test are shared with # release.yml via the composite actions under .gitea/actions/ (ADR-029). # actions/checkout MUST stay the first step: a local `uses: ./…` action # only exists on disk after checkout. # # Required Gitea secrets: # STAGING_POSTGRES_PASSWORD # STAGING_MINIO_PASSWORD # STAGING_MINIO_APP_PASSWORD # STAGING_OCR_TRAINING_TOKEN # STAGING_APP_ADMIN_USERNAME # STAGING_APP_ADMIN_PASSWORD # GRAFANA_ADMIN_PASSWORD # GRAFANA_DB_PASSWORD (read-only grafana_reader DB role, issue #651) # GLITCHTIP_SECRET_KEY # SENTRY_DSN (set after GlitchTip first-run; empty = Sentry disabled) on: schedule: - cron: "0 2 * * *" workflow_dispatch: env: # Ensures the backend Dockerfile's `RUN --mount=type=cache` lines are # honoured (Maven cache survives between runs). DOCKER_BUILDKIT: "1" jobs: deploy-staging: # `ubuntu-latest` matches our self-hosted runner's advertised label # (the runner has labels: ubuntu-latest / ubuntu-24.04 / ubuntu-22.04). # `self-hosted` would never match — no runner advertises it — so the # job parks in the queue forever. ADR-011's "single-tenant" promise # is at the repo level; sharing this runner between CI and deploys # for the same repo is within that boundary. runs-on: ubuntu-latest steps: # MUST be first: the composite actions below live under .gitea/actions/ # and only exist on disk once the repo is checked out (ADR-029). - uses: actions/checkout@v4 - name: Write staging env file run: | cat > .env.staging < /tmp/compose-rendered.yml grep -q '^[[:space:]]*target: /import$' /tmp/compose-rendered.yml \ || { echo "::error::backend is missing the /import bind mount (see #526)"; exit 1; } grep -A2 '^[[:space:]]*target: /import$' /tmp/compose-rendered.yml \ | grep -q 'read_only: true' \ || { echo "::error::backend /import mount is not read-only (see #526)"; exit 1; } - name: Build images # `--pull` forces re-fetching pinned base images so a CVE # re-publication of the same tag (e.g. node:20.19.0-alpine3.21, # postgres:16-alpine) is picked up instead of being served # from the host's stale Docker layer cache. run: | docker compose \ -f docker-compose.prod.yml \ -p archiv-staging \ --env-file .env.staging \ --profile staging \ build --pull - name: Deploy staging run: | docker compose \ -f docker-compose.prod.yml \ -p archiv-staging \ --env-file .env.staging \ --profile staging \ up -d --wait --remove-orphans # POSTGRES_HOST is derived from the Compose project name (archiv-staging) # and service name (db). A project rename requires updating this value. - uses: ./.gitea/actions/deploy-obs with: grafana_admin_password: ${{ secrets.GRAFANA_ADMIN_PASSWORD }} grafana_db_password: ${{ secrets.GRAFANA_DB_PASSWORD }} glitchtip_secret_key: ${{ secrets.GLITCHTIP_SECRET_KEY }} postgres_password: ${{ secrets.STAGING_POSTGRES_PASSWORD }} postgres_host: archiv-staging-db-1 - uses: ./.gitea/actions/reload-caddy - uses: ./.gitea/actions/smoke-test with: host: staging.raddatz.cloud - name: Cleanup env file # LOAD-BEARING: `if: always()` is the linchpin of the ADR-011 # single-tenant runner trust model. Every secret in .env.staging # is plain text on the runner filesystem until this step runs. # If a future refactor drops `if: always()`, a failed deploy # leaves the env-file behind. Do not remove this conditional # without first re-evaluating ADR-011. if: always() run: rm -f .env.staging npm-audit: # Independent parallel job — a deploy failure cannot mask the audit signal # and a clean audit cannot hide a broken deploy. Intentionally no `needs:`. # # Scans dev deps too (no --omit=dev), which is deliberately broader than the # PR gate (ci.yml §Security audit) that uses --omit=dev. A nightly broader # result is NOT a PR gate failure — it catches dev-tooling advisories (esbuild, # Vite, etc.) early. See docs/infrastructure/ci-gitea.md §Nightly audit vs PR gate. # # Required Gitea secrets: # NIGHTLY_AUDIT_TOKEN — PAT with issues scope only. An issues-only token # means a leak via logs/process-args cannot push # branches, open PRs, or read repo contents (ADR-041). runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - name: Assert jq is available run: which jq || sudo apt-get install -y jq - name: Run npm audit and file tracking issue on findings # Never run under set -x — NIGHTLY_AUDIT_TOKEN in env would leak to logs. env: NIGHTLY_AUDIT_TOKEN: ${{ secrets.NIGHTLY_AUDIT_TOKEN }} run: | MARKER="Nightly npm audit: high-severity advisory" GITEA_URL="${{ github.server_url }}" REPO="${{ github.repository }}" RUN_URL="${GITEA_URL}/${REPO}/actions/runs/${{ github.run_id }}" # --- Gitea API helper --- # api METHOD URL [extra curl args...] — authenticated Gitea API call. # `curl -sf` collapses every HTTP >=400 into a bare "exit 22", which # surfaces as an opaque step failure (issue #839). Instead we read the # status code and, on a >=400 response, print an actionable ::error:: # to stderr (so a calling command substitution does not swallow it) and # return 1 — `set -e` then still fails the step. The token is never # echoed (no set -x; never placed in the message). api() { local method="$1" url="$2"; shift 2 local resp http resp=$(curl -s -w '\n%{http_code}' -X "$method" \ -H "Authorization: token $NIGHTLY_AUDIT_TOKEN" "$@" -- "$url") http=${resp##*$'\n'} printf '%s' "${resp%$'\n'*}" case "$http" in 2*|3*) return 0 ;; 401|403) echo "::error::Gitea returned HTTP $http for $method ${url%%\?*} — the NIGHTLY_AUDIT_TOKEN secret is missing, expired, or lacks issue read+write scope; recreate the renovate_bot PAT and update the secret." >&2 return 1 ;; *) echo "::error::Gitea returned HTTP ${http:-(none)} for $method ${url%%\?*}." >&2 return 1 ;; esac } # --- Self-test (mirrors ci.yml §Assert pattern) --- # Runs before any real API call so broken logic fails loudly early: # (a) the jq title matcher used by the dedupe step — proves the regex # only; the create-vs-update decision is exercised by the # workflow_dispatch AC; # (b) the api helper's HTTP-status handling, driven by a mocked curl so # it needs no network — proves a 2xx returns the body and a >=400 # fails with an ::error:: instead of an opaque exit 22. echo "{\"title\": \"${MARKER}\"}" \ | jq -e --arg m "$MARKER" '.title | test($m; "i")' > /dev/null \ || { echo "FAIL: self-test — jq test() missed tracking issue title"; exit 1; } echo '{"title": "fix(deps): update dependency esbuild (CVE-2025-12345)"}' \ | jq -e --arg m "$MARKER" '.title | test($m; "i") | not' > /dev/null \ || { echo "FAIL: self-test — jq test() incorrectly matched unrelated title"; exit 1; } ( curl() { printf 'OK\n200'; }; [ "$(api GET selftest)" = "OK" ] ) \ || { echo "FAIL: self-test — api helper dropped body on HTTP 200"; exit 1; } ( curl() { printf 'nope\n401'; } if api GET selftest >/dev/null 2>/tmp/api_selftest_err; then exit 1; fi grep -q '::error::' /tmp/api_selftest_err ) \ || { echo "FAIL: self-test — api helper did not emit ::error:: on HTTP 401"; exit 1; } echo "Self-test passed." # --- Run audit --- # No npm ci — audit reads only the lockfile (no network, no install). set +e (cd frontend && npm audit --audit-level=high --json > /tmp/audit.json) AUDIT_EXIT=$? set -e if [ "$AUDIT_EXIT" -ne 0 ]; then # --- Build issue body with jq (never string-concat advisory text) --- # Advisory overview/title text is registry-controlled; string-concat # would be an injection/escaping vector into the API body. Truncate # raw excerpt to 500 chars so a pathological overview can't produce # a multi-MB PATCH body. ISSUE_BODY=$(jq -r \ --arg run_url "$RUN_URL" \ ' (.vulnerabilities // {}) as $vulns | ($vulns | to_entries | map(select(.value.severity == "high" or .value.severity == "critical")) | map("- **" + .key + "** (" + .value.severity + ")") | if length > 0 then join("\n") else "_See raw output for details._" end) as $pkg_list | "## npm audit: high/critical advisories\n\n" + $pkg_list + "\n\n**Run:** " + $run_url + "\n\n
Raw audit excerpt (first 500 chars)\n\n```\n" + (tostring | .[0:500]) + "\n```\n\n
" ' /tmp/audit.json) # --- Dedupe: fetch open security issues, match by title marker --- # Renovate vuln PRs also carry the "security" label, so >1 open # "security" issue WILL occur. Title-match (not just label) ensures # we deduplicate only our own tracking issue. OPEN_ISSUES=$(api GET \ "${GITEA_URL}/api/v1/repos/${REPO}/issues?state=open&type=issues&labels=security&limit=50") MATCHED=$(echo "$OPEN_ISSUES" | jq \ --arg m "$MARKER" \ '[.[] | select(.title | test($m; "i"))] | sort_by(.created_at)') MATCH_COUNT=$(echo "$MATCHED" | jq 'length') if [ "$MATCH_COUNT" -gt 0 ]; then # Patch the oldest matched issue (append run URL to body). ISSUE_NUMBER=$(echo "$MATCHED" | jq -r '.[0].number') EXISTING_BODY=$(echo "$MATCHED" | jq -r '.[0].body') NEW_BODY=$(jq -n \ --arg existing "$EXISTING_BODY" \ --arg run_url "$RUN_URL" \ '$existing + "\n\n---\n\nUpdated by run: " + $run_url') PAYLOAD=$(jq -n --arg body "$NEW_BODY" '{"body": $body}') api PATCH \ "${GITEA_URL}/api/v1/repos/${REPO}/issues/${ISSUE_NUMBER}" \ -H "Content-Type: application/json" \ -d "$PAYLOAD" > /dev/null echo "Updated tracking issue #${ISSUE_NUMBER}" else # Closed prior issue that recurs → new issue (not reopened). # A re-opened issue would obscure when the advisory was re-discovered. PAYLOAD=$(jq -n \ --arg title "$MARKER" \ --arg body "$ISSUE_BODY" \ '{"title": $title, "body": $body}') CREATED=$(api POST \ "${GITEA_URL}/api/v1/repos/${REPO}/issues" \ -H "Content-Type: application/json" \ -d "$PAYLOAD") NEW_NUMBER=$(echo "$CREATED" | jq -r '.number') echo "Opened new tracking issue #${NEW_NUMBER}" # Labels are ignored on issue create in Gitea — add in a follow-up call. LABEL_IDS=$(api GET \ "${GITEA_URL}/api/v1/repos/${REPO}/labels?limit=50" \ | jq '[.[] | select(.name == "security" or .name == "devops" or .name == "P1-high") | .id]') api POST \ "${GITEA_URL}/api/v1/repos/${REPO}/issues/${NEW_NUMBER}/labels" \ -H "Content-Type: application/json" \ -d "{\"labels\": $LABEL_IDS}" > /dev/null fi exit "$AUDIT_EXIT" else # --- Heartbeat: proves the job ran and found nothing --- # "No issue created" is only meaningful evidence when paired with a # visible positive signal. Without this, a never-ran job is # indistinguishable from a clean run. # # $GITHUB_STEP_SUMMARY availability is unproven on this runner # (act_runner populates it, but this is the first run to verify it). # Guard before use so an unset variable does not fail the clean-path. MSG="✅ npm audit clean $(date -u)" if [ -n "${GITHUB_STEP_SUMMARY:-}" ]; then echo "$MSG" >> "$GITHUB_STEP_SUMMARY" fi echo "$MSG" fi