Separate parallel job (no `needs:`) so a deploy failure cannot mask the audit signal and vice versa. Scans dev deps (no --omit=dev) — deliberately broader than the PR gate; see ci-gitea.md §Nightly audit vs PR gate. Key behaviours: - Self-test the jq title-matcher before any API call (mirrors ci.yml guard pattern) - Survives non-zero exit: set +e captures AUDIT_EXIT before dedupe runs - Dedupes by MARKER in title (handles >1 open security issues from Renovate) - Patches oldest match or opens new issue; closed prior → new issue (expected) - JSON payload built entirely with jq — never string-concat advisory text - NIGHTLY_AUDIT_TOKEN passed via step env: only, never inline, never under set -x - Heartbeat on clean path (guards $GITHUB_STEP_SUMMARY availability — unproven) Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
308 lines
14 KiB
YAML
308 lines
14 KiB
YAML
name: nightly
|
||
|
||
# Builds and deploys the staging environment from main every night.
|
||
# Runs on the self-hosted runner using Docker-out-of-Docker (the docker
|
||
# socket is mounted in), so `docker compose build` produces images on
|
||
# the host daemon and `docker compose up` consumes them directly — no
|
||
# registry hop.
|
||
#
|
||
# Operational assumptions (see docs/DEPLOYMENT.md §3 for the full setup):
|
||
#
|
||
# 1. Single-tenant self-hosted runner. The "Write staging env file" step
|
||
# writes every secret to .env.staging on the runner filesystem; the
|
||
# `if: always()` cleanup step removes it. A multi-tenant runner
|
||
# would need to switch to docker compose --env-file <(stdin) instead.
|
||
#
|
||
# 2. Host docker layer cache is authoritative. There is no
|
||
# actions/cache; we rely on the host daemon to keep Maven and npm
|
||
# layers warm between runs. A `docker system prune` on the host
|
||
# will cause the next nightly build to be cold (5–10 min slower).
|
||
#
|
||
# Staging environment isolation:
|
||
# - project name: archiv-staging
|
||
# - host ports: backend 8081, frontend 3001
|
||
# - profile: staging (starts mailpit instead of a real SMTP relay)
|
||
#
|
||
# The obs-stack deploy, Caddy reload, and smoke test are shared with
|
||
# release.yml via the composite actions under .gitea/actions/ (ADR-029).
|
||
# actions/checkout MUST stay the first step: a local `uses: ./…` action
|
||
# only exists on disk after checkout.
|
||
#
|
||
# Required Gitea secrets:
|
||
# STAGING_POSTGRES_PASSWORD
|
||
# STAGING_MINIO_PASSWORD
|
||
# STAGING_MINIO_APP_PASSWORD
|
||
# STAGING_OCR_TRAINING_TOKEN
|
||
# STAGING_APP_ADMIN_USERNAME
|
||
# STAGING_APP_ADMIN_PASSWORD
|
||
# GRAFANA_ADMIN_PASSWORD
|
||
# GRAFANA_DB_PASSWORD (read-only grafana_reader DB role, issue #651)
|
||
# GLITCHTIP_SECRET_KEY
|
||
# SENTRY_DSN (set after GlitchTip first-run; empty = Sentry disabled)
|
||
|
||
on:
|
||
schedule:
|
||
- cron: "0 2 * * *"
|
||
workflow_dispatch:
|
||
|
||
env:
|
||
# Ensures the backend Dockerfile's `RUN --mount=type=cache` lines are
|
||
# honoured (Maven cache survives between runs).
|
||
DOCKER_BUILDKIT: "1"
|
||
|
||
jobs:
|
||
deploy-staging:
|
||
# `ubuntu-latest` matches our self-hosted runner's advertised label
|
||
# (the runner has labels: ubuntu-latest / ubuntu-24.04 / ubuntu-22.04).
|
||
# `self-hosted` would never match — no runner advertises it — so the
|
||
# job parks in the queue forever. ADR-011's "single-tenant" promise
|
||
# is at the repo level; sharing this runner between CI and deploys
|
||
# for the same repo is within that boundary.
|
||
runs-on: ubuntu-latest
|
||
steps:
|
||
# MUST be first: the composite actions below live under .gitea/actions/
|
||
# and only exist on disk once the repo is checked out (ADR-029).
|
||
- uses: actions/checkout@v4
|
||
|
||
- name: Write staging env file
|
||
run: |
|
||
cat > .env.staging <<EOF
|
||
TAG=nightly
|
||
PORT_BACKEND=8081
|
||
PORT_FRONTEND=3001
|
||
APP_DOMAIN=staging.raddatz.cloud
|
||
POSTGRES_PASSWORD=${{ secrets.STAGING_POSTGRES_PASSWORD }}
|
||
MINIO_PASSWORD=${{ secrets.STAGING_MINIO_PASSWORD }}
|
||
MINIO_APP_PASSWORD=${{ secrets.STAGING_MINIO_APP_PASSWORD }}
|
||
OCR_TRAINING_TOKEN=${{ secrets.STAGING_OCR_TRAINING_TOKEN }}
|
||
APP_ADMIN_USERNAME=${{ secrets.STAGING_APP_ADMIN_USERNAME }}
|
||
APP_ADMIN_PASSWORD=${{ secrets.STAGING_APP_ADMIN_PASSWORD }}
|
||
MAIL_HOST=mailpit
|
||
MAIL_PORT=1025
|
||
MAIL_USERNAME=
|
||
MAIL_PASSWORD=
|
||
MAIL_SMTP_AUTH=false
|
||
MAIL_STARTTLS_ENABLE=false
|
||
APP_MAIL_FROM=noreply@staging.raddatz.cloud
|
||
IMPORT_HOST_DIR=/srv/familienarchiv-staging/import
|
||
POSTGRES_USER=archiv
|
||
SENTRY_DSN=${{ secrets.SENTRY_DSN }}
|
||
VITE_SENTRY_DSN=${{ secrets.VITE_SENTRY_DSN }}
|
||
GRAFANA_DB_PASSWORD=${{ secrets.GRAFANA_DB_PASSWORD }}
|
||
EOF
|
||
|
||
- name: Verify backend /import:ro mount is wired
|
||
# Regression guard for #526: the /admin/system mass-import card
|
||
# only works when the backend service mounts the host import
|
||
# payload at /import (read-only). If a future "compose cleanup"
|
||
# PR drops the volumes block, mass import silently breaks again.
|
||
# `compose config` renders both shorthand and longform mounts as
|
||
# `target: /import` + `read_only: true`, so we assert against
|
||
# the rendered form rather than the raw source YAML.
|
||
# App-compose check (not obs), nightly-only — stays inline.
|
||
run: |
|
||
set -e
|
||
docker compose \
|
||
-f docker-compose.prod.yml \
|
||
-p archiv-staging \
|
||
--env-file .env.staging \
|
||
--profile staging \
|
||
config > /tmp/compose-rendered.yml
|
||
grep -q '^[[:space:]]*target: /import$' /tmp/compose-rendered.yml \
|
||
|| { echo "::error::backend is missing the /import bind mount (see #526)"; exit 1; }
|
||
grep -A2 '^[[:space:]]*target: /import$' /tmp/compose-rendered.yml \
|
||
| grep -q 'read_only: true' \
|
||
|| { echo "::error::backend /import mount is not read-only (see #526)"; exit 1; }
|
||
|
||
- name: Build images
|
||
# `--pull` forces re-fetching pinned base images so a CVE
|
||
# re-publication of the same tag (e.g. node:20.19.0-alpine3.21,
|
||
# postgres:16-alpine) is picked up instead of being served
|
||
# from the host's stale Docker layer cache.
|
||
run: |
|
||
docker compose \
|
||
-f docker-compose.prod.yml \
|
||
-p archiv-staging \
|
||
--env-file .env.staging \
|
||
--profile staging \
|
||
build --pull
|
||
|
||
- name: Deploy staging
|
||
run: |
|
||
docker compose \
|
||
-f docker-compose.prod.yml \
|
||
-p archiv-staging \
|
||
--env-file .env.staging \
|
||
--profile staging \
|
||
up -d --wait --remove-orphans
|
||
|
||
# POSTGRES_HOST is derived from the Compose project name (archiv-staging)
|
||
# and service name (db). A project rename requires updating this value.
|
||
- uses: ./.gitea/actions/deploy-obs
|
||
with:
|
||
grafana_admin_password: ${{ secrets.GRAFANA_ADMIN_PASSWORD }}
|
||
grafana_db_password: ${{ secrets.GRAFANA_DB_PASSWORD }}
|
||
glitchtip_secret_key: ${{ secrets.GLITCHTIP_SECRET_KEY }}
|
||
postgres_password: ${{ secrets.STAGING_POSTGRES_PASSWORD }}
|
||
postgres_host: archiv-staging-db-1
|
||
|
||
- uses: ./.gitea/actions/reload-caddy
|
||
|
||
- uses: ./.gitea/actions/smoke-test
|
||
with:
|
||
host: staging.raddatz.cloud
|
||
|
||
- name: Cleanup env file
|
||
# LOAD-BEARING: `if: always()` is the linchpin of the ADR-011
|
||
# single-tenant runner trust model. Every secret in .env.staging
|
||
# is plain text on the runner filesystem until this step runs.
|
||
# If a future refactor drops `if: always()`, a failed deploy
|
||
# leaves the env-file behind. Do not remove this conditional
|
||
# without first re-evaluating ADR-011.
|
||
if: always()
|
||
run: rm -f .env.staging
|
||
|
||
npm-audit:
|
||
# Independent parallel job — a deploy failure cannot mask the audit signal
|
||
# and a clean audit cannot hide a broken deploy. Intentionally no `needs:`.
|
||
#
|
||
# Scans dev deps too (no --omit=dev), which is deliberately broader than the
|
||
# PR gate (ci.yml §Security audit) that uses --omit=dev. A nightly broader
|
||
# result is NOT a PR gate failure — it catches dev-tooling advisories (esbuild,
|
||
# Vite, etc.) early. See docs/infrastructure/ci-gitea.md §Nightly audit vs PR gate.
|
||
#
|
||
# Required Gitea secrets:
|
||
# NIGHTLY_AUDIT_TOKEN — PAT with issues scope only. An issues-only token
|
||
# means a leak via logs/process-args cannot push
|
||
# branches, open PRs, or read repo contents (ADR-041).
|
||
runs-on: ubuntu-latest
|
||
steps:
|
||
- uses: actions/checkout@v4
|
||
|
||
- name: Assert jq is available
|
||
run: which jq || sudo apt-get install -y jq
|
||
|
||
- name: Run npm audit and file tracking issue on findings
|
||
# Never run under set -x — NIGHTLY_AUDIT_TOKEN in env would leak to logs.
|
||
env:
|
||
NIGHTLY_AUDIT_TOKEN: ${{ secrets.NIGHTLY_AUDIT_TOKEN }}
|
||
run: |
|
||
MARKER="Nightly npm audit: high-severity advisory"
|
||
GITEA_URL="${{ github.server_url }}"
|
||
REPO="${{ github.repository }}"
|
||
RUN_URL="${GITEA_URL}/${REPO}/actions/runs/${{ github.run_id }}"
|
||
|
||
# --- Self-test (mirrors ci.yml §Assert pattern) ---
|
||
# Tests the exact jq test() call used in the dedupe step, before any
|
||
# API call, so a broken matcher fails loudly early rather than silently
|
||
# opening duplicate issues. Proves the regex only — create-vs-update
|
||
# decision is exercised by the workflow_dispatch AC.
|
||
echo "{\"title\": \"${MARKER}\"}" \
|
||
| jq -e --arg m "$MARKER" '.title | test($m; "i")' > /dev/null \
|
||
|| { echo "FAIL: self-test — jq test() missed tracking issue title"; exit 1; }
|
||
echo '{"title": "fix(deps): update dependency esbuild (CVE-2025-12345)"}' \
|
||
| jq -e --arg m "$MARKER" '.title | test($m; "i") | not' > /dev/null \
|
||
|| { echo "FAIL: self-test — jq test() incorrectly matched unrelated title"; exit 1; }
|
||
echo "Self-test passed."
|
||
|
||
# --- Run audit ---
|
||
# No npm ci — audit reads only the lockfile (no network, no install).
|
||
set +e
|
||
(cd frontend && npm audit --audit-level=high --json > /tmp/audit.json)
|
||
AUDIT_EXIT=$?
|
||
set -e
|
||
|
||
if [ "$AUDIT_EXIT" -ne 0 ]; then
|
||
# --- Build issue body with jq (never string-concat advisory text) ---
|
||
# Advisory overview/title text is registry-controlled; string-concat
|
||
# would be an injection/escaping vector into the API body. Truncate
|
||
# raw excerpt to 500 chars so a pathological overview can't produce
|
||
# a multi-MB PATCH body.
|
||
ISSUE_BODY=$(jq -r \
|
||
--arg run_url "$RUN_URL" \
|
||
'
|
||
(.vulnerabilities // {}) as $vulns |
|
||
($vulns | to_entries |
|
||
map(select(.value.severity == "high" or .value.severity == "critical")) |
|
||
map("- **" + .key + "** (" + .value.severity + ")") |
|
||
if length > 0 then join("\n") else "_See raw output for details._" end) as $pkg_list |
|
||
"## npm audit: high/critical advisories\n\n" + $pkg_list +
|
||
"\n\n**Run:** " + $run_url +
|
||
"\n\n<details><summary>Raw audit excerpt (first 500 chars)</summary>\n\n```\n" +
|
||
(tostring | .[0:500]) +
|
||
"\n```\n\n</details>"
|
||
' /tmp/audit.json)
|
||
|
||
# --- Dedupe: fetch open security issues, match by title marker ---
|
||
# Renovate vuln PRs also carry the "security" label, so >1 open
|
||
# "security" issue WILL occur. Title-match (not just label) ensures
|
||
# we deduplicate only our own tracking issue.
|
||
OPEN_ISSUES=$(curl -sf \
|
||
-H "Authorization: token $NIGHTLY_AUDIT_TOKEN" \
|
||
"${GITEA_URL}/api/v1/repos/${REPO}/issues?state=open&type=issues&labels=security&limit=50")
|
||
|
||
MATCHED=$(echo "$OPEN_ISSUES" | jq \
|
||
--arg m "$MARKER" \
|
||
'[.[] | select(.title | test($m; "i"))] | sort_by(.created_at)')
|
||
MATCH_COUNT=$(echo "$MATCHED" | jq 'length')
|
||
|
||
if [ "$MATCH_COUNT" -gt 0 ]; then
|
||
# Patch the oldest matched issue (append run URL to body).
|
||
ISSUE_NUMBER=$(echo "$MATCHED" | jq -r '.[0].number')
|
||
EXISTING_BODY=$(echo "$MATCHED" | jq -r '.[0].body')
|
||
NEW_BODY=$(jq -n \
|
||
--arg existing "$EXISTING_BODY" \
|
||
--arg run_url "$RUN_URL" \
|
||
'$existing + "\n\n---\n\nUpdated by run: " + $run_url')
|
||
PAYLOAD=$(jq -n --arg body "$NEW_BODY" '{"body": $body}')
|
||
curl -sf -X PATCH \
|
||
-H "Authorization: token $NIGHTLY_AUDIT_TOKEN" \
|
||
-H "Content-Type: application/json" \
|
||
-d "$PAYLOAD" \
|
||
"${GITEA_URL}/api/v1/repos/${REPO}/issues/${ISSUE_NUMBER}" > /dev/null
|
||
echo "Updated tracking issue #${ISSUE_NUMBER}"
|
||
else
|
||
# Closed prior issue that recurs → new issue (not reopened).
|
||
# A re-opened issue would obscure when the advisory was re-discovered.
|
||
PAYLOAD=$(jq -n \
|
||
--arg title "$MARKER" \
|
||
--arg body "$ISSUE_BODY" \
|
||
'{"title": $title, "body": $body}')
|
||
CREATED=$(curl -sf -X POST \
|
||
-H "Authorization: token $NIGHTLY_AUDIT_TOKEN" \
|
||
-H "Content-Type: application/json" \
|
||
-d "$PAYLOAD" \
|
||
"${GITEA_URL}/api/v1/repos/${REPO}/issues")
|
||
NEW_NUMBER=$(echo "$CREATED" | jq -r '.number')
|
||
echo "Opened new tracking issue #${NEW_NUMBER}"
|
||
|
||
# Labels are ignored on issue create in Gitea — add in a follow-up call.
|
||
LABEL_IDS=$(curl -sf \
|
||
-H "Authorization: token $NIGHTLY_AUDIT_TOKEN" \
|
||
"${GITEA_URL}/api/v1/repos/${REPO}/labels?limit=50" \
|
||
| jq '[.[] | select(.name == "security" or .name == "devops" or .name == "P1-high") | .id]')
|
||
curl -sf -X POST \
|
||
-H "Authorization: token $NIGHTLY_AUDIT_TOKEN" \
|
||
-H "Content-Type: application/json" \
|
||
-d "{\"labels\": $LABEL_IDS}" \
|
||
"${GITEA_URL}/api/v1/repos/${REPO}/issues/${NEW_NUMBER}/labels" > /dev/null
|
||
fi
|
||
|
||
exit "$AUDIT_EXIT"
|
||
|
||
else
|
||
# --- Heartbeat: proves the job ran and found nothing ---
|
||
# "No issue created" is only meaningful evidence when paired with a
|
||
# visible positive signal. Without this, a never-ran job is
|
||
# indistinguishable from a clean run.
|
||
#
|
||
# $GITHUB_STEP_SUMMARY availability is unproven on this runner
|
||
# (act_runner populates it, but this is the first run to verify it).
|
||
# Guard before use so an unset variable does not fail the clean-path.
|
||
MSG="✅ npm audit clean $(date -u)"
|
||
if [ -n "${GITHUB_STEP_SUMMARY:-}" ]; then
|
||
echo "$MSG" >> "$GITHUB_STEP_SUMMARY"
|
||
fi
|
||
echo "$MSG"
|
||
fi
|