From d7d225af7754634d995e71adad0809c51d5905e5 Mon Sep 17 00:00:00 2001 From: Marcel Date: Fri, 15 May 2026 11:22:37 +0200 Subject: [PATCH 1/8] devops(observability): wire observability stack into nightly and release deploys - docker-compose.prod.yml: add `name: archiv-net` so the network has a stable Docker name regardless of compose project name (-p flag). Both staging and production share the same host-level network, which is correct since the observability stack is a single shared instance. - nightly.yml / release.yml: add observability env vars (POSTGRES_USER, PORT_GRAFANA=3003, PORT_GLITCHTIP=3002, PORT_PROMETHEUS=9090, GRAFANA_ADMIN_PASSWORD, GLITCHTIP_SECRET_KEY, GLITCHTIP_DOMAIN) to the env file, then `docker compose -f docker-compose.observability.yml up -d` after the app deploy step. PORT_GRAFANA=3003 avoids collision with staging frontend on 3001. Requires two new Gitea secrets: GRAFANA_ADMIN_PASSWORD, GLITCHTIP_SECRET_KEY. Co-Authored-By: Claude Sonnet 4.6 --- .gitea/workflows/nightly.yml | 14 ++++++++++++++ .gitea/workflows/release.yml | 14 ++++++++++++++ docker-compose.prod.yml | 1 + 3 files changed, 29 insertions(+) diff --git a/.gitea/workflows/nightly.yml b/.gitea/workflows/nightly.yml index 86564f9c..ccc691a3 100644 --- a/.gitea/workflows/nightly.yml +++ b/.gitea/workflows/nightly.yml @@ -74,6 +74,13 @@ jobs: MAIL_STARTTLS_ENABLE=false APP_MAIL_FROM=noreply@staging.raddatz.cloud IMPORT_HOST_DIR=/srv/familienarchiv-staging/import + POSTGRES_USER=archiv + PORT_GRAFANA=3003 + PORT_GLITCHTIP=3002 + PORT_PROMETHEUS=9090 + GRAFANA_ADMIN_PASSWORD=${{ secrets.GRAFANA_ADMIN_PASSWORD }} + GLITCHTIP_SECRET_KEY=${{ secrets.GLITCHTIP_SECRET_KEY }} + GLITCHTIP_DOMAIN=https://glitchtip.archiv.raddatz.cloud EOF - name: Verify backend /import:ro mount is wired @@ -120,6 +127,13 @@ jobs: --profile staging \ up -d --wait --remove-orphans + - name: Start observability stack + run: | + docker compose \ + -f docker-compose.observability.yml \ + --env-file .env.staging \ + up -d + - name: Reload Caddy # Apply any committed Caddyfile changes before smoke-testing the # public surface. Without this step, a Caddyfile edit lands in the diff --git a/.gitea/workflows/release.yml b/.gitea/workflows/release.yml index d980ca10..9ef65d14 100644 --- a/.gitea/workflows/release.yml +++ b/.gitea/workflows/release.yml @@ -72,6 +72,13 @@ jobs: MAIL_STARTTLS_ENABLE=true APP_MAIL_FROM=noreply@raddatz.cloud IMPORT_HOST_DIR=/srv/familienarchiv-production/import + POSTGRES_USER=archiv + PORT_GRAFANA=3003 + PORT_GLITCHTIP=3002 + PORT_PROMETHEUS=9090 + GRAFANA_ADMIN_PASSWORD=${{ secrets.GRAFANA_ADMIN_PASSWORD }} + GLITCHTIP_SECRET_KEY=${{ secrets.GLITCHTIP_SECRET_KEY }} + GLITCHTIP_DOMAIN=https://glitchtip.archiv.raddatz.cloud EOF - name: Build images @@ -93,6 +100,13 @@ jobs: --env-file .env.production \ up -d --wait --remove-orphans + - name: Start observability stack + run: | + docker compose \ + -f docker-compose.observability.yml \ + --env-file .env.production \ + up -d + - name: Reload Caddy # See nightly.yml — same rationale and mechanism: DooD job containers # cannot call systemctl directly; nsenter via a privileged sibling diff --git a/docker-compose.prod.yml b/docker-compose.prod.yml index e8687d45..73139252 100644 --- a/docker-compose.prod.yml +++ b/docker-compose.prod.yml @@ -39,6 +39,7 @@ networks: archiv-net: driver: bridge + name: archiv-net volumes: postgres-data: -- 2.49.1 From 4c8a23ff14fc4b9bb9215ae5ebb1975d06ee7cb3 Mon Sep 17 00:00:00 2001 From: Marcel Date: Fri, 15 May 2026 11:27:07 +0200 Subject: [PATCH 2/8] devops(caddy): add Grafana and GlitchTip vhosts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit grafana.archiv.raddatz.cloud → 127.0.0.1:3003 (with security headers) glitchtip.archiv.raddatz.cloud → 127.0.0.1:3002 (no security headers — GlitchTip manages its own; the Sentry SDK also POSTs here) Requires A records for both subdomains pointing at the server before the next `systemctl reload caddy`. Co-Authored-By: Claude Sonnet 4.6 --- infra/caddy/Caddyfile | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/infra/caddy/Caddyfile b/infra/caddy/Caddyfile index 2c0c0757..4477301e 100644 --- a/infra/caddy/Caddyfile +++ b/infra/caddy/Caddyfile @@ -88,3 +88,12 @@ git.raddatz.cloud { import security_headers reverse_proxy 127.0.0.1:3005 } + +grafana.archiv.raddatz.cloud { + import security_headers + reverse_proxy 127.0.0.1:3003 +} + +glitchtip.archiv.raddatz.cloud { + reverse_proxy 127.0.0.1:3002 +} -- 2.49.1 From b137e3e72d256fe5e86c81d71abf8cd95936fa73 Mon Sep 17 00:00:00 2001 From: Marcel Date: Fri, 15 May 2026 13:43:35 +0200 Subject: [PATCH 3/8] devops(caddy): add HSTS to GlitchTip vhost Caddy does not set Strict-Transport-Security on GlitchTip because the full security_headers snippet is intentionally omitted (Permissions-Policy interferes with the Sentry SDK CORS). Adding HSTS alone guarantees HTTPS enforcement at the Caddy layer without breaking SDK ingestion. Co-Authored-By: Claude Sonnet 4.6 --- infra/caddy/Caddyfile | 1 + 1 file changed, 1 insertion(+) diff --git a/infra/caddy/Caddyfile b/infra/caddy/Caddyfile index 4477301e..6c27bf7d 100644 --- a/infra/caddy/Caddyfile +++ b/infra/caddy/Caddyfile @@ -95,5 +95,6 @@ grafana.archiv.raddatz.cloud { } glitchtip.archiv.raddatz.cloud { + header Strict-Transport-Security "max-age=31536000; includeSubDomains; preload" reverse_proxy 127.0.0.1:3002 } -- 2.49.1 From f15e0046451582abd192c3dc2adf9a510f809495 Mon Sep 17 00:00:00 2001 From: Marcel Date: Fri, 15 May 2026 13:44:16 +0200 Subject: [PATCH 4/8] devops(ci): add --wait to observability stack startup Prometheus, Loki, Tempo, and Grafana all define healthchecks in docker-compose.observability.yml. Without --wait, the step exits 0 as soon as containers are created, masking startup failures silently. Co-Authored-By: Claude Sonnet 4.6 --- .gitea/workflows/nightly.yml | 2 +- .gitea/workflows/release.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.gitea/workflows/nightly.yml b/.gitea/workflows/nightly.yml index ccc691a3..030b38eb 100644 --- a/.gitea/workflows/nightly.yml +++ b/.gitea/workflows/nightly.yml @@ -132,7 +132,7 @@ jobs: docker compose \ -f docker-compose.observability.yml \ --env-file .env.staging \ - up -d + up -d --wait - name: Reload Caddy # Apply any committed Caddyfile changes before smoke-testing the diff --git a/.gitea/workflows/release.yml b/.gitea/workflows/release.yml index 9ef65d14..f7d631c3 100644 --- a/.gitea/workflows/release.yml +++ b/.gitea/workflows/release.yml @@ -105,7 +105,7 @@ jobs: docker compose \ -f docker-compose.observability.yml \ --env-file .env.production \ - up -d + up -d --wait - name: Reload Caddy # See nightly.yml — same rationale and mechanism: DooD job containers -- 2.49.1 From 4a7349543aac01c8c8858b4c970554c03caa3436 Mon Sep 17 00:00:00 2001 From: Marcel Date: Fri, 15 May 2026 13:45:07 +0200 Subject: [PATCH 5/8] devops(ci): wire SENTRY_DSN into staging and production env files Adds SENTRY_DSN as an optional secret (empty by default) so it can be set after GlitchTip first-run without requiring another code change. Backend reads it via application.yaml; empty value keeps Sentry disabled. Co-Authored-By: Claude Sonnet 4.6 --- .gitea/workflows/nightly.yml | 4 ++++ .gitea/workflows/release.yml | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/.gitea/workflows/nightly.yml b/.gitea/workflows/nightly.yml index 030b38eb..838d859f 100644 --- a/.gitea/workflows/nightly.yml +++ b/.gitea/workflows/nightly.yml @@ -30,6 +30,9 @@ name: nightly # STAGING_OCR_TRAINING_TOKEN # STAGING_APP_ADMIN_USERNAME # STAGING_APP_ADMIN_PASSWORD +# GRAFANA_ADMIN_PASSWORD +# GLITCHTIP_SECRET_KEY +# SENTRY_DSN (set after GlitchTip first-run; empty = Sentry disabled) on: schedule: @@ -81,6 +84,7 @@ jobs: GRAFANA_ADMIN_PASSWORD=${{ secrets.GRAFANA_ADMIN_PASSWORD }} GLITCHTIP_SECRET_KEY=${{ secrets.GLITCHTIP_SECRET_KEY }} GLITCHTIP_DOMAIN=https://glitchtip.archiv.raddatz.cloud + SENTRY_DSN=${{ secrets.SENTRY_DSN }} EOF - name: Verify backend /import:ro mount is wired diff --git a/.gitea/workflows/release.yml b/.gitea/workflows/release.yml index f7d631c3..16da676f 100644 --- a/.gitea/workflows/release.yml +++ b/.gitea/workflows/release.yml @@ -34,6 +34,9 @@ name: release # MAIL_PORT # MAIL_USERNAME # MAIL_PASSWORD +# GRAFANA_ADMIN_PASSWORD +# GLITCHTIP_SECRET_KEY +# SENTRY_DSN (set after GlitchTip first-run; empty = Sentry disabled) on: push: @@ -79,6 +82,7 @@ jobs: GRAFANA_ADMIN_PASSWORD=${{ secrets.GRAFANA_ADMIN_PASSWORD }} GLITCHTIP_SECRET_KEY=${{ secrets.GLITCHTIP_SECRET_KEY }} GLITCHTIP_DOMAIN=https://glitchtip.archiv.raddatz.cloud + SENTRY_DSN=${{ secrets.SENTRY_DSN }} EOF - name: Build images -- 2.49.1 From 553e2f8898c33b3d4e0c822d74cb9b46c59e9ab7 Mon Sep 17 00:00:00 2001 From: Marcel Date: Fri, 15 May 2026 13:46:01 +0200 Subject: [PATCH 6/8] =?UTF-8?q?docs(deployment):=20add=20observability=20s?= =?UTF-8?q?ecrets=20to=20=C2=A73.3=20Gitea=20secrets=20table?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit GRAFANA_ADMIN_PASSWORD, GLITCHTIP_SECRET_KEY, and SENTRY_DSN were referenced in the workflow env files but absent from the secrets table, leaving the first-run operator without a complete checklist. Co-Authored-By: Claude Sonnet 4.6 --- docs/DEPLOYMENT.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/docs/DEPLOYMENT.md b/docs/DEPLOYMENT.md index 32e47798..b6845cf3 100644 --- a/docs/DEPLOYMENT.md +++ b/docs/DEPLOYMENT.md @@ -223,6 +223,9 @@ git.raddatz.cloud A | `MAIL_PORT` | release.yml | typically `587` | | `MAIL_USERNAME` | release.yml | SMTP user | | `MAIL_PASSWORD` | release.yml | SMTP password | +| `GRAFANA_ADMIN_PASSWORD` | both | Grafana `admin` login — generate a strong password | +| `GLITCHTIP_SECRET_KEY` | both | Django secret key — `openssl rand -hex 32` | +| `SENTRY_DSN` | both | GlitchTip project DSN — set after first-run (§4); leave empty to keep Sentry disabled | ### 3.4 First deploy -- 2.49.1 From 8cf3a2a7263c2e45aa2fecf12bb1922e1e21c2f3 Mon Sep 17 00:00:00 2001 From: Marcel Date: Fri, 15 May 2026 14:54:54 +0200 Subject: [PATCH 7/8] devops(caddy): apply full security_headers snippet to GlitchTip vhost The GlitchTip vhost only had a manual HSTS header; the rest of the (security_headers) snippet (X-Content-Type-Options, Referrer-Policy, Permissions-Policy, -Server removal) was missing. Co-Authored-By: Claude Sonnet 4.6 --- infra/caddy/Caddyfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/infra/caddy/Caddyfile b/infra/caddy/Caddyfile index 6c27bf7d..b3d1e971 100644 --- a/infra/caddy/Caddyfile +++ b/infra/caddy/Caddyfile @@ -95,6 +95,6 @@ grafana.archiv.raddatz.cloud { } glitchtip.archiv.raddatz.cloud { - header Strict-Transport-Security "max-age=31536000; includeSubDomains; preload" + import security_headers reverse_proxy 127.0.0.1:3002 } -- 2.49.1 From ada3a3ccaf57fabb147ad98b1487d69027f60e12 Mon Sep 17 00:00:00 2001 From: Marcel Date: Fri, 15 May 2026 14:55:28 +0200 Subject: [PATCH 8/8] devops(ci): add --remove-orphans to observability stack deploy steps Both nightly and release workflows were missing --remove-orphans on the observability compose up, while the main app deploy step already had it. Without it, containers removed from docker-compose.observability.yml linger as unnamed orphans until manually pruned. Co-Authored-By: Claude Sonnet 4.6 --- .gitea/workflows/nightly.yml | 2 +- .gitea/workflows/release.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.gitea/workflows/nightly.yml b/.gitea/workflows/nightly.yml index 838d859f..81cf885c 100644 --- a/.gitea/workflows/nightly.yml +++ b/.gitea/workflows/nightly.yml @@ -136,7 +136,7 @@ jobs: docker compose \ -f docker-compose.observability.yml \ --env-file .env.staging \ - up -d --wait + up -d --wait --remove-orphans - name: Reload Caddy # Apply any committed Caddyfile changes before smoke-testing the diff --git a/.gitea/workflows/release.yml b/.gitea/workflows/release.yml index 16da676f..2645dc15 100644 --- a/.gitea/workflows/release.yml +++ b/.gitea/workflows/release.yml @@ -109,7 +109,7 @@ jobs: docker compose \ -f docker-compose.observability.yml \ --env-file .env.production \ - up -d --wait + up -d --wait --remove-orphans - name: Reload Caddy # See nightly.yml — same rationale and mechanism: DooD job containers -- 2.49.1