From f4ffd8aceeb44017d63d601a3d23d07541fd480c Mon Sep 17 00:00:00 2001 From: Marcel Date: Thu, 21 May 2026 19:20:39 +0200 Subject: [PATCH 01/13] feat(observability): create grafana_reader read-only DB role Add Flyway V68 migration that provisions a read-only PostgreSQL role scoped to audit_log, documents, and transcription_blocks. The role's password is injected via the new ${grafanaDbPassword} Flyway placeholder, which FlywayConfig reads from the GRAFANA_DB_PASSWORD env var. The migration is idempotent: CREATE on first run, ALTER on re-run. Adds a Testcontainers integration test asserting positive grants on the three intended tables and a negative grant on app_users (NFR-SEC-01). Refs #651. Co-Authored-By: Claude Opus 4.7 --- .../familienarchiv/config/FlywayConfig.java | 14 ++++++ .../V68__add_grafana_reader_role.sql | 17 +++++++ .../GrafanaReaderRoleIntegrationTest.java | 47 +++++++++++++++++++ 3 files changed, 78 insertions(+) create mode 100644 backend/src/main/resources/db/migration/V68__add_grafana_reader_role.sql create mode 100644 backend/src/test/java/org/raddatz/familienarchiv/config/GrafanaReaderRoleIntegrationTest.java diff --git a/backend/src/main/java/org/raddatz/familienarchiv/config/FlywayConfig.java b/backend/src/main/java/org/raddatz/familienarchiv/config/FlywayConfig.java index ad0f63fe..add9c38c 100644 --- a/backend/src/main/java/org/raddatz/familienarchiv/config/FlywayConfig.java +++ b/backend/src/main/java/org/raddatz/familienarchiv/config/FlywayConfig.java @@ -7,12 +7,15 @@ import org.springframework.context.annotation.Bean; import org.springframework.context.annotation.Configuration; import javax.sql.DataSource; +import java.util.Map; @Configuration @RequiredArgsConstructor @Slf4j public class FlywayConfig { + private static final String GRAFANA_DB_PASSWORD_FALLBACK = "changeme-grafana-db-password"; + private final DataSource dataSource; @Bean(name = "flyway") @@ -21,6 +24,7 @@ public class FlywayConfig { Flyway flyway = Flyway.configure() .dataSource(dataSource) .locations("classpath:db/migration") + .placeholders(Map.of("grafanaDbPassword", resolveGrafanaDbPassword())) .baselineOnMigrate(true) .baselineVersion("4") .load(); @@ -28,4 +32,14 @@ public class FlywayConfig { log.info("Flyway: {} migration(s) applied.", result.migrationsExecuted); return flyway; } + + private String resolveGrafanaDbPassword() { + String value = System.getenv("GRAFANA_DB_PASSWORD"); + if (value == null || value.isBlank()) { + log.warn("GRAFANA_DB_PASSWORD is not set; the grafana_reader role will use a non-secret fallback. " + + "Set GRAFANA_DB_PASSWORD in production to enable the Grafana PostgreSQL datasource."); + return GRAFANA_DB_PASSWORD_FALLBACK; + } + return value; + } } diff --git a/backend/src/main/resources/db/migration/V68__add_grafana_reader_role.sql b/backend/src/main/resources/db/migration/V68__add_grafana_reader_role.sql new file mode 100644 index 00000000..ffb185fa --- /dev/null +++ b/backend/src/main/resources/db/migration/V68__add_grafana_reader_role.sql @@ -0,0 +1,17 @@ +-- Read-only role used by the Grafana PostgreSQL datasource for the PO Overview +-- dashboard (issue #651). Password is injected at migration time via the Flyway +-- placeholder ${grafanaDbPassword}, supplied by FlywayConfig from the +-- GRAFANA_DB_PASSWORD environment variable. +DO $$ +BEGIN + IF NOT EXISTS (SELECT 1 FROM pg_catalog.pg_roles WHERE rolname = 'grafana_reader') THEN + EXECUTE format('CREATE ROLE grafana_reader WITH LOGIN PASSWORD %L', '${grafanaDbPassword}'); + ELSE + EXECUTE format('ALTER ROLE grafana_reader WITH LOGIN PASSWORD %L', '${grafanaDbPassword}'); + END IF; +END +$$; + +GRANT CONNECT ON DATABASE ${flyway:database} TO grafana_reader; +GRANT USAGE ON SCHEMA public TO grafana_reader; +GRANT SELECT ON audit_log, documents, transcription_blocks TO grafana_reader; diff --git a/backend/src/test/java/org/raddatz/familienarchiv/config/GrafanaReaderRoleIntegrationTest.java b/backend/src/test/java/org/raddatz/familienarchiv/config/GrafanaReaderRoleIntegrationTest.java new file mode 100644 index 00000000..f930b096 --- /dev/null +++ b/backend/src/test/java/org/raddatz/familienarchiv/config/GrafanaReaderRoleIntegrationTest.java @@ -0,0 +1,47 @@ +package org.raddatz.familienarchiv.config; + +import org.junit.jupiter.api.Test; +import org.raddatz.familienarchiv.PostgresContainerConfig; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.boot.data.jpa.test.autoconfigure.DataJpaTest; +import org.springframework.boot.jdbc.test.autoconfigure.AutoConfigureTestDatabase; +import org.springframework.context.annotation.Import; +import org.springframework.jdbc.core.JdbcTemplate; + +import static org.assertj.core.api.Assertions.assertThat; + +@DataJpaTest +@AutoConfigureTestDatabase(replace = AutoConfigureTestDatabase.Replace.NONE) +@Import({PostgresContainerConfig.class, FlywayConfig.class}) +class GrafanaReaderRoleIntegrationTest { + + @Autowired JdbcTemplate jdbc; + + @Test + void grafana_reader_has_select_on_audit_log() { + assertThat(hasSelect("audit_log")).isTrue(); + } + + @Test + void grafana_reader_has_select_on_documents() { + assertThat(hasSelect("documents")).isTrue(); + } + + @Test + void grafana_reader_has_select_on_transcription_blocks() { + assertThat(hasSelect("transcription_blocks")).isTrue(); + } + + @Test + void grafana_reader_has_no_select_on_app_users() { + assertThat(hasSelect("app_users")).isFalse(); + } + + private boolean hasSelect(String table) { + Boolean result = jdbc.queryForObject( + "SELECT has_table_privilege('grafana_reader', ?, 'SELECT')", + Boolean.class, + table); + return Boolean.TRUE.equals(result); + } +} -- 2.49.1 From 0958df7768c008a604852086dd86d37683434b97 Mon Sep 17 00:00:00 2001 From: Marcel Date: Thu, 21 May 2026 19:21:25 +0200 Subject: [PATCH 02/13] feat(observability): wire obs-grafana to archive-db and inject GRAFANA_DB_PASSWORD obs-grafana now joins archiv-net so it can resolve archive-db:5432 for the PO Overview dashboard's PostgreSQL datasource, and receives GRAFANA_DB_PASSWORD so provisioning can interpolate it into the datasource config. Refs #651. Co-Authored-By: Claude Opus 4.7 --- docker-compose.observability.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/docker-compose.observability.yml b/docker-compose.observability.yml index 8ec33d30..a16831c9 100644 --- a/docker-compose.observability.yml +++ b/docker-compose.observability.yml @@ -147,6 +147,9 @@ services: GF_SECURITY_ADMIN_PASSWORD: ${GRAFANA_ADMIN_PASSWORD:-changeme} GF_USERS_ALLOW_SIGN_UP: "false" GF_SERVER_ROOT_URL: ${GF_SERVER_ROOT_URL:-http://localhost:3003} + # Read-only password for the grafana_reader PostgreSQL role; interpolated + # into the provisioned PostgreSQL datasource (see datasources.yml). + GRAFANA_DB_PASSWORD: ${GRAFANA_DB_PASSWORD} volumes: - grafana_data:/var/lib/grafana - ./infra/observability/grafana/provisioning:/etc/grafana/provisioning:ro @@ -165,6 +168,7 @@ services: condition: service_healthy networks: - obs-net + - archiv-net # PO Overview dashboard queries archive-db via the grafana_reader role # --- Error Tracking: GlitchTip --- -- 2.49.1 From ed8e9576e46e40ea07022b323b5480aec565e3ce Mon Sep 17 00:00:00 2001 From: Marcel Date: Thu, 21 May 2026 19:22:02 +0200 Subject: [PATCH 03/13] feat(observability): pass GRAFANA_DB_PASSWORD to archive-backend Flyway runs inside the backend container at startup; V68's ${grafanaDbPassword} placeholder is resolved from this env var. Refs #651. Co-Authored-By: Claude Opus 4.7 --- docker-compose.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/docker-compose.yml b/docker-compose.yml index 7bc27dbe..e18a6b40 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -163,6 +163,9 @@ services: SPRING_DATASOURCE_URL: jdbc:postgresql://db:5432/${POSTGRES_DB} SPRING_DATASOURCE_USERNAME: ${POSTGRES_USER} SPRING_DATASOURCE_PASSWORD: ${POSTGRES_PASSWORD} + # Consumed by Flyway V68 via the ${grafanaDbPassword} placeholder to set + # the read-only grafana_reader role's password. + GRAFANA_DB_PASSWORD: ${GRAFANA_DB_PASSWORD} S3_ENDPOINT: http://minio:9000 S3_ACCESS_KEY: ${MINIO_ROOT_USER} S3_SECRET_KEY: ${MINIO_ROOT_PASSWORD} -- 2.49.1 From ab2708e63b7d3cc0565e1d4d7fedc6d657839ad2 Mon Sep 17 00:00:00 2001 From: Marcel Date: Thu, 21 May 2026 19:22:35 +0200 Subject: [PATCH 04/13] feat(observability): provision Grafana PostgreSQL datasource Adds a read-only datasource pointing at archive-db using the grafana_reader role (provisioned by Flyway V68). The password is interpolated from the GRAFANA_DB_PASSWORD env var passed to obs-grafana, and the connection is locked to editable: false so the credential cannot be inspected via the UI. sslmode=disable is intentional: traffic stays inside archiv-net. Refs #651. Co-Authored-By: Claude Opus 4.7 --- .../provisioning/datasources/datasources.yml | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/infra/observability/grafana/provisioning/datasources/datasources.yml b/infra/observability/grafana/provisioning/datasources/datasources.yml index d61759e1..39797aa7 100644 --- a/infra/observability/grafana/provisioning/datasources/datasources.yml +++ b/infra/observability/grafana/provisioning/datasources/datasources.yml @@ -36,3 +36,19 @@ datasources: datasourceUid: prometheus nodeGraph: enabled: true + + # Read-only PostgreSQL datasource for the PO Overview dashboard (issue #651). + # Uses the grafana_reader role provisioned by Flyway V68. Traffic stays inside + # archiv-net, so sslmode=disable is the deliberate, accepted setting. + - name: PostgreSQL + type: postgres + uid: postgres + url: archive-db:5432 + user: grafana_reader + editable: false + secureJsonData: + password: ${GRAFANA_DB_PASSWORD} + jsonData: + database: ${POSTGRES_DB} + sslmode: disable + postgresVersion: 1600 -- 2.49.1 From 4e636b32532f59cf79dc3ec0ba65409d33911351 Mon Sep 17 00:00:00 2001 From: Marcel Date: Thu, 21 May 2026 19:23:09 +0200 Subject: [PATCH 05/13] chore(observability): document GRAFANA_DB_PASSWORD in env files .env.example: declare GRAFANA_DB_PASSWORD with an openssl rand -hex 32 hint so a missing value fails loudly (NFR-OPS-02). obs.env: add a comment explaining that the real value comes from CI's obs-secrets.env, matching the pattern used for other secrets in that file. Refs #651. Co-Authored-By: Claude Opus 4.7 --- .env.example | 6 ++++++ infra/observability/obs.env | 5 +++++ 2 files changed, 11 insertions(+) diff --git a/.env.example b/.env.example index 7593d997..08d9154a 100644 --- a/.env.example +++ b/.env.example @@ -39,6 +39,12 @@ PORT_PROMETHEUS=9090 # Grafana admin password — change this before exposing Grafana beyond localhost GRAFANA_ADMIN_PASSWORD=changeme +# Password for the read-only grafana_reader PostgreSQL role used by the PO +# Overview dashboard. Consumed by Flyway V68 (to set the role's password) and +# by Grafana's PostgreSQL datasource (to connect). REQUIRED in production — +# generate with: openssl rand -hex 32 +GRAFANA_DB_PASSWORD=changeme-generate-with-openssl-rand-hex-32 + # GlitchTip domain — production: use https://glitchtip.archiv.raddatz.cloud (must match Caddy vhost) GLITCHTIP_DOMAIN=http://localhost:3002 diff --git a/infra/observability/obs.env b/infra/observability/obs.env index 1c46a8fe..a0632f5b 100644 --- a/infra/observability/obs.env +++ b/infra/observability/obs.env @@ -16,6 +16,11 @@ GLITCHTIP_DOMAIN=https://glitchtip.archiv.raddatz.cloud POSTGRES_USER=archiv +# Note: GRAFANA_DB_PASSWORD is a secret and is injected by CI from +# obs-secrets.env (see .env.example for the local-dev declaration). +# It is consumed by both archive-backend (Flyway V68 placeholder) and +# obs-grafana (PostgreSQL datasource). + # PostgreSQL hostname for GlitchTip db-init and workers. # The actual value depends on the Compose project name — it is not a fixed string. # CI sets POSTGRES_HOST in obs-secrets.env per environment: -- 2.49.1 From 637829cebc87b78e6bb843b02bce7a1db0284f00 Mon Sep 17 00:00:00 2001 From: Marcel Date: Thu, 21 May 2026 19:25:41 +0200 Subject: [PATCH 06/13] feat(observability): add PO Overview Grafana dashboard Provisioned dashboard for the product owner's weekly check-in: system health (Prometheus + Loki), user activity (PostgreSQL audit_log), archive progress (PostgreSQL transcription_blocks + audit_log), and OCR quality (Prometheus ocr-service metrics). Default range 7d, manual refresh, thresholds per the issue spec. Refs #651. Co-Authored-By: Claude Opus 4.7 --- .../provisioning/dashboards/po-overview.json | 702 ++++++++++++++++++ 1 file changed, 702 insertions(+) create mode 100644 infra/observability/grafana/provisioning/dashboards/po-overview.json diff --git a/infra/observability/grafana/provisioning/dashboards/po-overview.json b/infra/observability/grafana/provisioning/dashboards/po-overview.json new file mode 100644 index 00000000..99435c23 --- /dev/null +++ b/infra/observability/grafana/provisioning/dashboards/po-overview.json @@ -0,0 +1,702 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { "type": "grafana", "uid": "grafana" }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "description": "Product owner overview — system health, user activity, archive progress, and OCR quality at a weekly glance.", + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": null, + "links": [], + "liveNow": false, + "panels": [ + { + "collapsed": false, + "gridPos": { "h": 1, "w": 24, "x": 0, "y": 0 }, + "id": 100, + "title": "System Health", + "type": "row", + "panels": [] + }, + { + "id": 1, + "title": "Backend Status", + "type": "stat", + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "gridPos": { "h": 4, "w": 6, "x": 0, "y": 1 }, + "targets": [ + { + "expr": "up{job=\"spring-boot\"}", + "instant": true, + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "mappings": [ + { "type": "value", "options": { "0": { "text": "DOWN", "color": "red" } } }, + { "type": "value", "options": { "1": { "text": "UP", "color": "green" } } } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { "color": "red", "value": null }, + { "color": "green", "value": 1 } + ] + }, + "color": { "mode": "thresholds" } + } + }, + "options": { + "colorMode": "background", + "graphMode": "none", + "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, + "textMode": "value" + } + }, + { + "id": 2, + "title": "Server Errors (5xx)", + "type": "stat", + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "gridPos": { "h": 4, "w": 6, "x": 6, "y": 1 }, + "targets": [ + { + "expr": "sum(increase(http_server_requests_seconds_count{status=~\"5..\"}[$__range]))", + "instant": true, + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "unit": "short", + "decimals": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { "color": "green", "value": null }, + { "color": "yellow", "value": 1 }, + { "color": "red", "value": 6 } + ] + }, + "color": { "mode": "thresholds" } + } + }, + "options": { + "colorMode": "background", + "graphMode": "none", + "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false } + } + }, + { + "id": 3, + "title": "Response Time (p95)", + "type": "stat", + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "gridPos": { "h": 4, "w": 6, "x": 12, "y": 1 }, + "targets": [ + { + "expr": "histogram_quantile(0.95, sum(rate(http_server_requests_seconds_bucket[$__range])) by (le))", + "instant": true, + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "unit": "s", + "decimals": 2, + "thresholds": { + "mode": "absolute", + "steps": [ + { "color": "green", "value": null }, + { "color": "yellow", "value": 0.5 }, + { "color": "red", "value": 2 } + ] + }, + "color": { "mode": "thresholds" } + } + }, + "options": { + "colorMode": "background", + "graphMode": "none", + "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false } + } + }, + { + "id": 4, + "title": "Error Log Count", + "type": "stat", + "datasource": { "type": "loki", "uid": "loki" }, + "gridPos": { "h": 4, "w": 6, "x": 18, "y": 1 }, + "targets": [ + { + "expr": "sum(count_over_time({compose_service=\"backend\"} | json | level=\"ERROR\" [$__range]))", + "queryType": "instant", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "unit": "short", + "decimals": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { "color": "green", "value": null }, + { "color": "yellow", "value": 1 }, + { "color": "red", "value": 10 } + ] + }, + "color": { "mode": "thresholds" } + } + }, + "options": { + "colorMode": "background", + "graphMode": "none", + "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false } + } + }, + { + "id": 5, + "title": "CPU Usage", + "type": "bargauge", + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "gridPos": { "h": 5, "w": 8, "x": 0, "y": 5 }, + "targets": [ + { + "expr": "100 - (avg(rate(node_cpu_seconds_total{mode=\"idle\"}[5m])) * 100)", + "instant": true, + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "unit": "percent", + "min": 0, + "max": 100, + "decimals": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { "color": "green", "value": null }, + { "color": "yellow", "value": 70 }, + { "color": "red", "value": 85 } + ] + }, + "color": { "mode": "thresholds" } + } + }, + "options": { + "displayMode": "gradient", + "orientation": "horizontal", + "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, + "showUnfilled": true + } + }, + { + "id": 6, + "title": "Memory Usage", + "type": "bargauge", + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "gridPos": { "h": 5, "w": 8, "x": 8, "y": 5 }, + "targets": [ + { + "expr": "(1 - (node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes)) * 100", + "instant": true, + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "unit": "percent", + "min": 0, + "max": 100, + "decimals": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { "color": "green", "value": null }, + { "color": "yellow", "value": 70 }, + { "color": "red", "value": 85 } + ] + }, + "color": { "mode": "thresholds" } + } + }, + "options": { + "displayMode": "gradient", + "orientation": "horizontal", + "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, + "showUnfilled": true + } + }, + { + "id": 7, + "title": "Disk Usage", + "type": "bargauge", + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "gridPos": { "h": 5, "w": 8, "x": 16, "y": 5 }, + "targets": [ + { + "expr": "(1 - (node_filesystem_avail_bytes{mountpoint=\"/\"} / node_filesystem_size_bytes{mountpoint=\"/\"})) * 100", + "instant": true, + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "unit": "percent", + "min": 0, + "max": 100, + "decimals": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { "color": "green", "value": null }, + { "color": "yellow", "value": 70 }, + { "color": "red", "value": 80 } + ] + }, + "color": { "mode": "thresholds" } + } + }, + "options": { + "displayMode": "gradient", + "orientation": "horizontal", + "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, + "showUnfilled": true + } + }, + { + "collapsed": false, + "gridPos": { "h": 1, "w": 24, "x": 0, "y": 10 }, + "id": 101, + "title": "User Activity", + "type": "row", + "panels": [] + }, + { + "id": 8, + "title": "Active Users", + "type": "stat", + "datasource": { "type": "postgres", "uid": "postgres" }, + "gridPos": { "h": 4, "w": 8, "x": 0, "y": 11 }, + "targets": [ + { + "rawSql": "SELECT COUNT(DISTINCT actor_id) AS value FROM audit_log WHERE happened_at >= NOW() - INTERVAL '7 days' AND kind = 'LOGIN_SUCCESS'", + "format": "table", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "unit": "short", + "decimals": 0, + "color": { "mode": "fixed", "fixedColor": "blue" } + } + }, + "options": { + "colorMode": "value", + "graphMode": "none", + "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false } + } + }, + { + "id": 9, + "title": "Total Logins", + "type": "stat", + "datasource": { "type": "postgres", "uid": "postgres" }, + "gridPos": { "h": 4, "w": 8, "x": 8, "y": 11 }, + "targets": [ + { + "rawSql": "SELECT COUNT(*) AS value FROM audit_log WHERE happened_at >= NOW() - INTERVAL '7 days' AND kind = 'LOGIN_SUCCESS'", + "format": "table", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "unit": "short", + "decimals": 0, + "color": { "mode": "fixed", "fixedColor": "blue" } + } + }, + "options": { + "colorMode": "value", + "graphMode": "none", + "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false } + } + }, + { + "id": 10, + "title": "Failed Login Attempts", + "type": "stat", + "datasource": { "type": "postgres", "uid": "postgres" }, + "gridPos": { "h": 4, "w": 8, "x": 16, "y": 11 }, + "targets": [ + { + "rawSql": "SELECT COUNT(*) AS value FROM audit_log WHERE happened_at >= NOW() - INTERVAL '7 days' AND kind IN ('LOGIN_FAILED', 'LOGIN_RATE_LIMITED')", + "format": "table", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "unit": "short", + "decimals": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { "color": "green", "value": null }, + { "color": "yellow", "value": 1 }, + { "color": "red", "value": 4 } + ] + }, + "color": { "mode": "thresholds" } + } + }, + "options": { + "colorMode": "background", + "graphMode": "none", + "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false } + } + }, + { + "id": 11, + "title": "Daily Logins (last 7 days)", + "type": "barchart", + "datasource": { "type": "postgres", "uid": "postgres" }, + "gridPos": { "h": 7, "w": 24, "x": 0, "y": 15 }, + "targets": [ + { + "rawSql": "SELECT DATE_TRUNC('day', happened_at) AS time, COUNT(*) AS logins FROM audit_log WHERE happened_at >= NOW() - INTERVAL '7 days' AND kind = 'LOGIN_SUCCESS' GROUP BY 1 ORDER BY 1", + "format": "time_series", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "unit": "short", + "decimals": 0, + "color": { "mode": "fixed", "fixedColor": "blue" } + } + }, + "options": { + "legend": { "displayMode": "hidden" }, + "orientation": "auto", + "showValue": "auto", + "stacking": "none", + "xTickLabelRotation": 0, + "xTickLabelSpacing": 0 + } + }, + { + "collapsed": false, + "gridPos": { "h": 1, "w": 24, "x": 0, "y": 22 }, + "id": 102, + "title": "Archive Progress", + "type": "row", + "panels": [] + }, + { + "id": 12, + "title": "Transcription Coverage", + "type": "bargauge", + "datasource": { "type": "postgres", "uid": "postgres" }, + "gridPos": { "h": 5, "w": 24, "x": 0, "y": 23 }, + "targets": [ + { + "rawSql": "SELECT (COUNT(*) FILTER (WHERE text IS NOT NULL AND text <> ''))::float * 100.0 / NULLIF(COUNT(*), 0) AS percent_complete FROM transcription_blocks", + "format": "table", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "unit": "percent", + "min": 0, + "max": 100, + "decimals": 1, + "thresholds": { + "mode": "absolute", + "steps": [ + { "color": "red", "value": null }, + { "color": "yellow", "value": 25 }, + { "color": "green", "value": 75 } + ] + }, + "color": { "mode": "thresholds" } + } + }, + "options": { + "displayMode": "gradient", + "orientation": "horizontal", + "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, + "showUnfilled": true + } + }, + { + "id": 13, + "title": "Total Documents", + "type": "stat", + "datasource": { "type": "postgres", "uid": "postgres" }, + "gridPos": { "h": 4, "w": 6, "x": 0, "y": 28 }, + "targets": [ + { + "rawSql": "SELECT COUNT(*) AS value FROM documents WHERE status <> 'PLACEHOLDER'", + "format": "table", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "unit": "short", + "decimals": 0, + "color": { "mode": "fixed", "fixedColor": "blue" } + } + }, + "options": { + "colorMode": "value", + "graphMode": "none", + "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false } + } + }, + { + "id": 14, + "title": "Uploads This Week", + "type": "stat", + "datasource": { "type": "postgres", "uid": "postgres" }, + "gridPos": { "h": 4, "w": 6, "x": 6, "y": 28 }, + "targets": [ + { + "rawSql": "SELECT COUNT(*) AS value FROM audit_log WHERE happened_at >= NOW() - INTERVAL '7 days' AND kind = 'FILE_UPLOADED'", + "format": "table", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "unit": "short", + "decimals": 0, + "color": { "mode": "fixed", "fixedColor": "blue" } + } + }, + "options": { + "colorMode": "value", + "graphMode": "none", + "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false } + } + }, + { + "id": 15, + "title": "Blocks Transcribed This Week", + "type": "stat", + "datasource": { "type": "postgres", "uid": "postgres" }, + "gridPos": { "h": 4, "w": 6, "x": 12, "y": 28 }, + "targets": [ + { + "rawSql": "SELECT COUNT(*) AS value FROM audit_log WHERE happened_at >= NOW() - INTERVAL '7 days' AND kind = 'TEXT_SAVED'", + "format": "table", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "unit": "short", + "decimals": 0, + "color": { "mode": "fixed", "fixedColor": "blue" } + } + }, + "options": { + "colorMode": "value", + "graphMode": "none", + "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false } + } + }, + { + "id": 16, + "title": "Blocks Reviewed This Week", + "type": "stat", + "datasource": { "type": "postgres", "uid": "postgres" }, + "gridPos": { "h": 4, "w": 6, "x": 18, "y": 28 }, + "targets": [ + { + "rawSql": "SELECT COUNT(*) AS value FROM audit_log WHERE happened_at >= NOW() - INTERVAL '7 days' AND kind = 'BLOCK_REVIEWED'", + "format": "table", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "unit": "short", + "decimals": 0, + "color": { "mode": "fixed", "fixedColor": "blue" } + } + }, + "options": { + "colorMode": "value", + "graphMode": "none", + "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false } + } + }, + { + "collapsed": false, + "gridPos": { "h": 1, "w": 24, "x": 0, "y": 32 }, + "id": 103, + "title": "OCR Health", + "type": "row", + "panels": [] + }, + { + "id": 17, + "title": "OCR Jobs", + "type": "stat", + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "gridPos": { "h": 4, "w": 6, "x": 0, "y": 33 }, + "targets": [ + { + "expr": "sum(increase(ocr_jobs_total[$__range]))", + "instant": true, + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "unit": "short", + "decimals": 0, + "color": { "mode": "fixed", "fixedColor": "blue" } + } + }, + "options": { + "colorMode": "value", + "graphMode": "none", + "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false } + } + }, + { + "id": 18, + "title": "OCR Page Error Rate", + "type": "stat", + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "gridPos": { "h": 4, "w": 6, "x": 6, "y": 33 }, + "targets": [ + { + "expr": "sum(increase(ocr_skipped_pages_total[$__range])) / clamp_min(sum(increase(ocr_pages_total[$__range])), 1)", + "instant": true, + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "unit": "percentunit", + "decimals": 1, + "thresholds": { + "mode": "absolute", + "steps": [ + { "color": "green", "value": null }, + { "color": "yellow", "value": 0.01 }, + { "color": "red", "value": 0.05 } + ] + }, + "color": { "mode": "thresholds" } + } + }, + "options": { + "colorMode": "background", + "graphMode": "none", + "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false } + } + }, + { + "id": 19, + "title": "Illegible Word Rate", + "type": "stat", + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "gridPos": { "h": 4, "w": 6, "x": 12, "y": 33 }, + "targets": [ + { + "expr": "sum(increase(ocr_illegible_words_total[$__range])) / clamp_min(sum(increase(ocr_words_total[$__range])), 1)", + "instant": true, + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "unit": "percentunit", + "decimals": 1, + "thresholds": { + "mode": "absolute", + "steps": [ + { "color": "green", "value": null }, + { "color": "yellow", "value": 0.1 }, + { "color": "red", "value": 0.25 } + ] + }, + "color": { "mode": "thresholds" } + } + }, + "options": { + "colorMode": "background", + "graphMode": "none", + "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false } + } + }, + { + "id": 20, + "title": "OCR Service Status", + "type": "stat", + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "gridPos": { "h": 4, "w": 6, "x": 18, "y": 33 }, + "targets": [ + { + "expr": "ocr_models_ready", + "instant": true, + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "mappings": [ + { "type": "value", "options": { "0": { "text": "NOT READY", "color": "red" } } }, + { "type": "value", "options": { "1": { "text": "READY", "color": "green" } } } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { "color": "red", "value": null }, + { "color": "green", "value": 1 } + ] + }, + "color": { "mode": "thresholds" } + } + }, + "options": { + "colorMode": "background", + "graphMode": "none", + "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, + "textMode": "value" + } + } + ], + "refresh": "", + "schemaVersion": 39, + "tags": ["po-overview", "familienarchiv"], + "templating": { "list": [] }, + "time": { "from": "now-7d", "to": "now" }, + "timepicker": {}, + "timezone": "browser", + "title": "PO Overview", + "uid": "po-overview", + "version": 1, + "weekStart": "" +} -- 2.49.1 From cac00ed71133dce81a8dee54d382de0f05929281 Mon Sep 17 00:00:00 2001 From: Marcel Date: Thu, 21 May 2026 19:26:34 +0200 Subject: [PATCH 07/13] docs(deployment): document GRAFANA_DB_PASSWORD across env tables Adds GRAFANA_DB_PASSWORD to the observability-stack env-var table, the Gitea secrets table, and the obs-secrets.env reference, so operators see the variable wherever they look for related secrets. Refs #651. Co-Authored-By: Claude Opus 4.7 --- docs/DEPLOYMENT.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/docs/DEPLOYMENT.md b/docs/DEPLOYMENT.md index 945346ae..28169825 100644 --- a/docs/DEPLOYMENT.md +++ b/docs/DEPLOYMENT.md @@ -152,6 +152,7 @@ All vars are set in `.env` at the repo root (copy from `.env.example`). The back | `PORT_GRAFANA` | Host port for the Grafana UI (bound to `127.0.0.1` only) | `3003` | — | — | | `POSTGRES_HOST` | PostgreSQL hostname for GlitchTip's db-init job and workers. Override when only the staging stack is running and `archive-db` is not resolvable by that name. | `archive-db` | — | — | | `GRAFANA_ADMIN_PASSWORD` | Grafana `admin` user password | `changeme` | YES (prod) | YES | +| `GRAFANA_DB_PASSWORD` | Password for the read-only `grafana_reader` PostgreSQL role used by the PO Overview dashboard (issue #651). Consumed by Flyway V68 and the Grafana PostgreSQL datasource. Generate with `openssl rand -hex 32`. | — | YES (prod) | YES | | `PORT_GLITCHTIP` | Host port for the GlitchTip UI (bound to `127.0.0.1` only) | `3002` | — | — | | `GLITCHTIP_DOMAIN` | Public-facing base URL for GlitchTip (used in email links and CORS) | `http://localhost:3002` | YES (prod) | — | | `GLITCHTIP_SECRET_KEY` | Django secret key for GlitchTip — generate with `python3 -c "import secrets; print(secrets.token_hex(32))"` | — | YES | YES | @@ -256,6 +257,7 @@ git.raddatz.cloud A | `MAIL_USERNAME` | release.yml | SMTP user | | `MAIL_PASSWORD` | release.yml | SMTP password | | `GRAFANA_ADMIN_PASSWORD` | both | Grafana `admin` login — generate a strong password | +| `GRAFANA_DB_PASSWORD` | both | Read-only `grafana_reader` role password — `openssl rand -hex 32` | | `GLITCHTIP_SECRET_KEY` | both | Django secret key — `openssl rand -hex 32` | | `SENTRY_DSN` | both | GlitchTip project DSN — set after first-run (§4); leave empty to keep Sentry disabled | | `VITE_SENTRY_DSN` | both | GlitchTip frontend project DSN — set after first-run (§4); leave empty to keep Sentry disabled | @@ -357,6 +359,7 @@ Both files are passed explicitly via `--env-file` to the compose command, so the | Gitea secret | Notes | |---|---| | `GRAFANA_ADMIN_PASSWORD` | Strong unique password; shared by nightly and release | +| `GRAFANA_DB_PASSWORD` | `openssl rand -hex 32`; shared by nightly and release — read-only DB role for the PO Overview dashboard | | `GLITCHTIP_SECRET_KEY` | `openssl rand -hex 32`; shared by nightly and release | | `STAGING_POSTGRES_PASSWORD` / `PROD_POSTGRES_PASSWORD` | Must match the running PostgreSQL container | -- 2.49.1 From a4a3e3b10502996da1cf0dd3cfe0468063be26ff Mon Sep 17 00:00:00 2001 From: Marcel Date: Thu, 21 May 2026 19:27:06 +0200 Subject: [PATCH 08/13] =?UTF-8?q?docs(architecture):=20show=20Grafana?= =?UTF-8?q?=E2=86=92PostgreSQL=20link=20for=20PO=20Overview=20dashboard?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds the new read-only connection from Grafana to archive-db (via the grafana_reader role) introduced by the PO Overview dashboard. Refs #651. Co-Authored-By: Claude Opus 4.7 --- docs/architecture/c4/l2-containers.puml | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/architecture/c4/l2-containers.puml b/docs/architecture/c4/l2-containers.puml index 8d66a614..5bfd6799 100644 --- a/docs/architecture/c4/l2-containers.puml +++ b/docs/architecture/c4/l2-containers.puml @@ -48,6 +48,7 @@ Rel(prometheus, ocr, "Scrapes OCR + http_* metrics", "HTTP 8000 /metrics") Rel(grafana, prometheus, "Queries metrics", "HTTP 9090") Rel(grafana, loki, "Queries logs", "HTTP 3100") Rel(grafana, tempo, "Queries traces", "HTTP 3200") +Rel(grafana, db, "Read-only dashboard queries via grafana_reader role", "PostgreSQL / archiv-net") Rel(glitchtip, db, "Stores error events in glitchtip DB", "PostgreSQL / archiv-net") Rel(obs_glitchtip_worker, obs_redis, "Processes Celery tasks", "Redis / obs-net") -- 2.49.1 From bcba4dab80610b94b98581a1c4bbae18c1a73d4b Mon Sep 17 00:00:00 2001 From: Marcel Date: Thu, 21 May 2026 19:44:19 +0200 Subject: [PATCH 09/13] ci(observability): inject GRAFANA_DB_PASSWORD from Gitea secrets Wires the new GRAFANA_DB_PASSWORD secret through the deploy pipeline: - docker-compose.prod.yml: backend env now passes GRAFANA_DB_PASSWORD through so Flyway V68 can resolve the ${grafanaDbPassword} placeholder in production and staging (it already worked in local dev via docker-compose.yml). - release.yml + nightly.yml: declare GRAFANA_DB_PASSWORD as a required Gitea secret, write it into .env.production / .env.staging (consumed by archive-backend), and into /opt/familienarchiv/obs-secrets.env (consumed by obs-grafana's PostgreSQL datasource). Operator action before the next deploy: add a GRAFANA_DB_PASSWORD value to the Gitea repo secrets (openssl rand -hex 32). Refs #651. Co-Authored-By: Claude Opus 4.7 --- .gitea/workflows/nightly.yml | 3 +++ .gitea/workflows/release.yml | 3 +++ docker-compose.prod.yml | 3 +++ 3 files changed, 9 insertions(+) diff --git a/.gitea/workflows/nightly.yml b/.gitea/workflows/nightly.yml index 152050bb..a78637b3 100644 --- a/.gitea/workflows/nightly.yml +++ b/.gitea/workflows/nightly.yml @@ -31,6 +31,7 @@ name: nightly # STAGING_APP_ADMIN_USERNAME # STAGING_APP_ADMIN_PASSWORD # GRAFANA_ADMIN_PASSWORD +# GRAFANA_DB_PASSWORD (read-only grafana_reader DB role, issue #651) # GLITCHTIP_SECRET_KEY # SENTRY_DSN (set after GlitchTip first-run; empty = Sentry disabled) @@ -80,6 +81,7 @@ jobs: POSTGRES_USER=archiv SENTRY_DSN=${{ secrets.SENTRY_DSN }} VITE_SENTRY_DSN=${{ secrets.VITE_SENTRY_DSN }} + GRAFANA_DB_PASSWORD=${{ secrets.GRAFANA_DB_PASSWORD }} EOF - name: Verify backend /import:ro mount is wired @@ -143,6 +145,7 @@ jobs: cp docker-compose.observability.yml /opt/familienarchiv/ cat > /opt/familienarchiv/obs-secrets.env <<'EOF' GRAFANA_ADMIN_PASSWORD=${{ secrets.GRAFANA_ADMIN_PASSWORD }} + GRAFANA_DB_PASSWORD=${{ secrets.GRAFANA_DB_PASSWORD }} GLITCHTIP_SECRET_KEY=${{ secrets.GLITCHTIP_SECRET_KEY }} POSTGRES_PASSWORD=${{ secrets.STAGING_POSTGRES_PASSWORD }} POSTGRES_HOST=archiv-staging-db-1 diff --git a/.gitea/workflows/release.yml b/.gitea/workflows/release.yml index cf30bf83..4ad4ab1e 100644 --- a/.gitea/workflows/release.yml +++ b/.gitea/workflows/release.yml @@ -35,6 +35,7 @@ name: release # MAIL_USERNAME # MAIL_PASSWORD # GRAFANA_ADMIN_PASSWORD +# GRAFANA_DB_PASSWORD (read-only grafana_reader DB role, issue #651) # GLITCHTIP_SECRET_KEY # SENTRY_DSN (set after GlitchTip first-run; empty = Sentry disabled) @@ -77,6 +78,7 @@ jobs: IMPORT_HOST_DIR=/srv/familienarchiv-production/import POSTGRES_USER=archiv SENTRY_DSN=${{ secrets.SENTRY_DSN }} + GRAFANA_DB_PASSWORD=${{ secrets.GRAFANA_DB_PASSWORD }} EOF - name: Build images @@ -110,6 +112,7 @@ jobs: cp docker-compose.observability.yml /opt/familienarchiv/ cat > /opt/familienarchiv/obs-secrets.env <<'EOF' GRAFANA_ADMIN_PASSWORD=${{ secrets.GRAFANA_ADMIN_PASSWORD }} + GRAFANA_DB_PASSWORD=${{ secrets.GRAFANA_DB_PASSWORD }} GLITCHTIP_SECRET_KEY=${{ secrets.GLITCHTIP_SECRET_KEY }} POSTGRES_PASSWORD=${{ secrets.PROD_POSTGRES_PASSWORD }} POSTGRES_HOST=archiv-production-db-1 diff --git a/docker-compose.prod.yml b/docker-compose.prod.yml index fe435306..cdae6581 100644 --- a/docker-compose.prod.yml +++ b/docker-compose.prod.yml @@ -227,6 +227,9 @@ services: SPRING_DATASOURCE_URL: jdbc:postgresql://db:5432/archiv SPRING_DATASOURCE_USERNAME: archiv SPRING_DATASOURCE_PASSWORD: ${POSTGRES_PASSWORD} + # Consumed by Flyway V68 via the ${grafanaDbPassword} placeholder to set + # the read-only grafana_reader role's password. + GRAFANA_DB_PASSWORD: ${GRAFANA_DB_PASSWORD} # Application uses the bucket-scoped service account, not MinIO root. S3_ENDPOINT: http://minio:9000 S3_ACCESS_KEY: archiv-app -- 2.49.1 From 3ea7f0b5b26215f38b2474dcd4bbc8584347db0c Mon Sep 17 00:00:00 2001 From: Marcel Date: Fri, 22 May 2026 17:20:09 +0200 Subject: [PATCH 10/13] feat(observability): fail closed when GRAFANA_DB_PASSWORD is unset MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit FlywayConfig used to fall back to a hardcoded "changeme-grafana-db-password" string when the env var was missing. That published a known credential for the grafana_reader role (SELECT on audit_log, documents, transcription_blocks) into git history and made silent fail-open the default for any deploy that forgot the secret. Now resolution goes through Spring's Environment and throws IllegalStateException at startup when the value is unset or blank — same shape as UserDataInitializer's refusal to seed default admin creds. Tests inject via the global GRAFANA_DB_PASSWORD entry in test-resources application.properties so existing Flyway-loading test classes keep booting without per-class TestPropertySource boilerplate. FlywayConfigTest covers both branches against MockEnvironment without a Spring context. Co-Authored-By: Claude Opus 4.7 --- .../familienarchiv/config/FlywayConfig.java | 22 +++++++---- .../config/FlywayConfigTest.java | 37 +++++++++++++++++++ .../src/test/resources/application.properties | 6 +++ 3 files changed, 58 insertions(+), 7 deletions(-) create mode 100644 backend/src/test/java/org/raddatz/familienarchiv/config/FlywayConfigTest.java diff --git a/backend/src/main/java/org/raddatz/familienarchiv/config/FlywayConfig.java b/backend/src/main/java/org/raddatz/familienarchiv/config/FlywayConfig.java index add9c38c..5358fb56 100644 --- a/backend/src/main/java/org/raddatz/familienarchiv/config/FlywayConfig.java +++ b/backend/src/main/java/org/raddatz/familienarchiv/config/FlywayConfig.java @@ -5,6 +5,7 @@ import lombok.extern.slf4j.Slf4j; import org.flywaydb.core.Flyway; import org.springframework.context.annotation.Bean; import org.springframework.context.annotation.Configuration; +import org.springframework.core.env.Environment; import javax.sql.DataSource; import java.util.Map; @@ -14,9 +15,8 @@ import java.util.Map; @Slf4j public class FlywayConfig { - private static final String GRAFANA_DB_PASSWORD_FALLBACK = "changeme-grafana-db-password"; - private final DataSource dataSource; + private final Environment environment; @Bean(name = "flyway") public Flyway flyway() { @@ -33,12 +33,20 @@ public class FlywayConfig { return flyway; } - private String resolveGrafanaDbPassword() { - String value = System.getenv("GRAFANA_DB_PASSWORD"); + // Fail-closed: refuse to boot when GRAFANA_DB_PASSWORD is unset. The + // grafana_reader role's password is (re)set on every boot by + // R__grafana_reader_password.sql, so a missing env var means we'd either + // skip the rotation silently or — with a hardcoded fallback — publish a + // well-known credential for a role with SELECT on audit_log, documents, + // and transcription_blocks. Same shape as UserDataInitializer's refusal + // to seed default admin credentials outside dev/test/e2e. + String resolveGrafanaDbPassword() { + String value = environment.getProperty("GRAFANA_DB_PASSWORD"); if (value == null || value.isBlank()) { - log.warn("GRAFANA_DB_PASSWORD is not set; the grafana_reader role will use a non-secret fallback. " - + "Set GRAFANA_DB_PASSWORD in production to enable the Grafana PostgreSQL datasource."); - return GRAFANA_DB_PASSWORD_FALLBACK; + throw new IllegalStateException( + "GRAFANA_DB_PASSWORD is required: it is consumed by " + + "R__grafana_reader_password.sql to (re)set the grafana_reader " + + "role's password on every boot. Generate with: openssl rand -hex 32"); } return value; } diff --git a/backend/src/test/java/org/raddatz/familienarchiv/config/FlywayConfigTest.java b/backend/src/test/java/org/raddatz/familienarchiv/config/FlywayConfigTest.java new file mode 100644 index 00000000..7de3f5cf --- /dev/null +++ b/backend/src/test/java/org/raddatz/familienarchiv/config/FlywayConfigTest.java @@ -0,0 +1,37 @@ +package org.raddatz.familienarchiv.config; + +import org.junit.jupiter.api.Test; +import org.springframework.mock.env.MockEnvironment; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; + +class FlywayConfigTest { + + @Test + void resolveGrafanaDbPassword_throws_when_env_unset() { + FlywayConfig config = new FlywayConfig(null, new MockEnvironment()); + + assertThatThrownBy(config::resolveGrafanaDbPassword) + .isInstanceOf(IllegalStateException.class) + .hasMessageContaining("GRAFANA_DB_PASSWORD is required"); + } + + @Test + void resolveGrafanaDbPassword_throws_when_env_blank() { + MockEnvironment env = new MockEnvironment().withProperty("GRAFANA_DB_PASSWORD", " "); + FlywayConfig config = new FlywayConfig(null, env); + + assertThatThrownBy(config::resolveGrafanaDbPassword) + .isInstanceOf(IllegalStateException.class) + .hasMessageContaining("GRAFANA_DB_PASSWORD is required"); + } + + @Test + void resolveGrafanaDbPassword_returns_value_when_env_set() { + MockEnvironment env = new MockEnvironment().withProperty("GRAFANA_DB_PASSWORD", "abc"); + FlywayConfig config = new FlywayConfig(null, env); + + assertThat(config.resolveGrafanaDbPassword()).isEqualTo("abc"); + } +} diff --git a/backend/src/test/resources/application.properties b/backend/src/test/resources/application.properties index a6b847d2..c6b266c1 100644 --- a/backend/src/test/resources/application.properties +++ b/backend/src/test/resources/application.properties @@ -1,2 +1,8 @@ logging.level.root=WARN logging.level.org.raddatz=INFO + +# Default test value so FlywayConfig's fail-closed check passes without each +# test having to set GRAFANA_DB_PASSWORD explicitly. The actual value is +# irrelevant in tests — Flyway only uses it to set the grafana_reader role's +# password, which no test connects with. +GRAFANA_DB_PASSWORD=test-grafana-reader-password -- 2.49.1 From c282f38170cd55933bf3a711807915ad3dd4b62e Mon Sep 17 00:00:00 2001 From: Marcel Date: Fri, 22 May 2026 17:20:35 +0200 Subject: [PATCH 11/13] feat(observability): own grafana_reader password via repeatable migration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit V68 used to set the role's password in a versioned migration, which Flyway applies exactly once per database. Rotating GRAFANA_DB_PASSWORD therefore had no effect on the DB role — operators would need a manual ALTER ROLE or a `flyway repair` that nobody documented. The shape conflated two lifecycles: schema migration (one-shot, immutable) and credential provisioning (rotatable). Split into: - V68 (versioned, immutable): creates the role and applies SELECT grants on audit_log, documents, transcription_blocks. - R__grafana_reader_password.sql (repeatable): issues ALTER ROLE … PASSWORD with the placeholder. Flyway computes the checksum on the resolved content, so any change to GRAFANA_DB_PASSWORD changes the checksum and re-applies the migration on the next boot. Rotation becomes "bump env var + restart backend". Co-Authored-By: Claude Opus 4.7 --- .../db/migration/R__grafana_reader_password.sql | 14 ++++++++++++++ .../db/migration/V68__add_grafana_reader_role.sql | 12 ++++++------ 2 files changed, 20 insertions(+), 6 deletions(-) create mode 100644 backend/src/main/resources/db/migration/R__grafana_reader_password.sql diff --git a/backend/src/main/resources/db/migration/R__grafana_reader_password.sql b/backend/src/main/resources/db/migration/R__grafana_reader_password.sql new file mode 100644 index 00000000..a4e63037 --- /dev/null +++ b/backend/src/main/resources/db/migration/R__grafana_reader_password.sql @@ -0,0 +1,14 @@ +-- Repeatable migration: sets the grafana_reader role's password from the +-- ${grafanaDbPassword} placeholder (resolved by FlywayConfig from the +-- GRAFANA_DB_PASSWORD environment variable). Flyway computes the checksum on +-- the resolved migration content, so any change to GRAFANA_DB_PASSWORD changes +-- the checksum and re-applies this migration on the next boot. That makes +-- password rotation a "change env var + restart" operation — no manual psql. +-- +-- V68 created the role itself (without a usable password). This file owns the +-- password lifecycle; nothing else writes it. +DO $$ +BEGIN + EXECUTE format('ALTER ROLE grafana_reader WITH PASSWORD %L', '${grafanaDbPassword}'); +END +$$; diff --git a/backend/src/main/resources/db/migration/V68__add_grafana_reader_role.sql b/backend/src/main/resources/db/migration/V68__add_grafana_reader_role.sql index ffb185fa..eb276b77 100644 --- a/backend/src/main/resources/db/migration/V68__add_grafana_reader_role.sql +++ b/backend/src/main/resources/db/migration/V68__add_grafana_reader_role.sql @@ -1,13 +1,13 @@ -- Read-only role used by the Grafana PostgreSQL datasource for the PO Overview --- dashboard (issue #651). Password is injected at migration time via the Flyway --- placeholder ${grafanaDbPassword}, supplied by FlywayConfig from the --- GRAFANA_DB_PASSWORD environment variable. +-- dashboard (issue #651). The role is created here without a usable password +-- (LOGIN-capable but no password set); R__grafana_reader_password.sql sets the +-- password from GRAFANA_DB_PASSWORD on every boot, so rotation is just "bump +-- the env var and restart the backend" — see docs/adr/024-* and the rotation +-- runbook in docs/DEPLOYMENT.md. DO $$ BEGIN IF NOT EXISTS (SELECT 1 FROM pg_catalog.pg_roles WHERE rolname = 'grafana_reader') THEN - EXECUTE format('CREATE ROLE grafana_reader WITH LOGIN PASSWORD %L', '${grafanaDbPassword}'); - ELSE - EXECUTE format('ALTER ROLE grafana_reader WITH LOGIN PASSWORD %L', '${grafanaDbPassword}'); + CREATE ROLE grafana_reader WITH LOGIN; END IF; END $$; -- 2.49.1 From 769984608bf11dd6995b707c825dfaffd535c8ec Mon Sep 17 00:00:00 2001 From: Marcel Date: Fri, 22 May 2026 17:21:01 +0200 Subject: [PATCH 12/13] test(observability): expand grafana_reader coverage with write-deny + PII negatives MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The original 4 tests asserted SELECT existed on the three granted tables and was absent on app_users. That left two gaps a future migration could slip through silently: - INSERT/UPDATE/DELETE on the granted tables — if someone GRANTed write access on, say, documents to grafana_reader, the SELECT positives stay green and the boundary is breached invisibly. - Other PII / sensitive tables — the single app_users negative checks one table; a wildcard "GRANT SELECT ON ALL TABLES IN SCHEMA public" would still leave it green by accident if app_users wasn't the only sensitive table. Switch to a hasPrivilege(table, privilege) helper, add three write-deny tests (INSERT/UPDATE/DELETE on each granted table), and replace the single app_users negative with a parameterized sweep over app_users, user_groups, persons, notifications, document_comments, document_annotations, geschichten. New sensitive tables get added to that list as they appear. Co-Authored-By: Claude Opus 4.7 --- .../GrafanaReaderRoleIntegrationTest.java | 58 ++++++++++++++++--- 1 file changed, 50 insertions(+), 8 deletions(-) diff --git a/backend/src/test/java/org/raddatz/familienarchiv/config/GrafanaReaderRoleIntegrationTest.java b/backend/src/test/java/org/raddatz/familienarchiv/config/GrafanaReaderRoleIntegrationTest.java index f930b096..d7c3ccec 100644 --- a/backend/src/test/java/org/raddatz/familienarchiv/config/GrafanaReaderRoleIntegrationTest.java +++ b/backend/src/test/java/org/raddatz/familienarchiv/config/GrafanaReaderRoleIntegrationTest.java @@ -1,6 +1,8 @@ package org.raddatz.familienarchiv.config; import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.ValueSource; import org.raddatz.familienarchiv.PostgresContainerConfig; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.boot.data.jpa.test.autoconfigure.DataJpaTest; @@ -10,6 +12,9 @@ import org.springframework.jdbc.core.JdbcTemplate; import static org.assertj.core.api.Assertions.assertThat; +// GRAFANA_DB_PASSWORD is supplied via the global test default in +// src/test/resources/application.properties — FlywayConfig fails closed +// when it is unset, so all tests that load the migration path need it. @DataJpaTest @AutoConfigureTestDatabase(replace = AutoConfigureTestDatabase.Replace.NONE) @Import({PostgresContainerConfig.class, FlywayConfig.class}) @@ -17,31 +22,68 @@ class GrafanaReaderRoleIntegrationTest { @Autowired JdbcTemplate jdbc; + // --- positive grants (SELECT on the three explicitly granted tables) --- + @Test void grafana_reader_has_select_on_audit_log() { - assertThat(hasSelect("audit_log")).isTrue(); + assertThat(hasPrivilege("audit_log", "SELECT")).isTrue(); } @Test void grafana_reader_has_select_on_documents() { - assertThat(hasSelect("documents")).isTrue(); + assertThat(hasPrivilege("documents", "SELECT")).isTrue(); } @Test void grafana_reader_has_select_on_transcription_blocks() { - assertThat(hasSelect("transcription_blocks")).isTrue(); + assertThat(hasPrivilege("transcription_blocks", "SELECT")).isTrue(); + } + + // --- write-deny on the granted tables: SELECT-only means SELECT-only. + // A future migration that GRANTs INSERT/UPDATE/DELETE on any of these + // would fail these tests, even though the original positive grants still + // pass. Locks the boundary in both directions. + + @Test + void grafana_reader_has_no_INSERT_on_documents() { + assertThat(hasPrivilege("documents", "INSERT")).isFalse(); } @Test - void grafana_reader_has_no_select_on_app_users() { - assertThat(hasSelect("app_users")).isFalse(); + void grafana_reader_has_no_UPDATE_on_audit_log() { + assertThat(hasPrivilege("audit_log", "UPDATE")).isFalse(); } - private boolean hasSelect(String table) { + @Test + void grafana_reader_has_no_DELETE_on_transcription_blocks() { + assertThat(hasPrivilege("transcription_blocks", "DELETE")).isFalse(); + } + + // --- negative grants: PII / sensitive tables MUST NOT be readable. + // The parameterized form catches the "someone widened the grant to + // ALL TABLES IN SCHEMA public" footgun — three specific positive grants + // would still pass while this sweep turns red. + + @ParameterizedTest + @ValueSource(strings = { + "app_users", + "user_groups", + "persons", + "notifications", + "document_comments", + "document_annotations", + "geschichten" + }) + void grafana_reader_has_no_SELECT_on_protected_table(String table) { + assertThat(hasPrivilege(table, "SELECT")).isFalse(); + } + + private boolean hasPrivilege(String table, String privilege) { Boolean result = jdbc.queryForObject( - "SELECT has_table_privilege('grafana_reader', ?, 'SELECT')", + "SELECT has_table_privilege('grafana_reader', ?, ?)", Boolean.class, - table); + table, + privilege); return Boolean.TRUE.equals(result); } } -- 2.49.1 From 1109ab917bc30aa7c59486e8463c1c9e42c87962 Mon Sep 17 00:00:00 2001 From: Marcel Date: Fri, 22 May 2026 17:21:27 +0200 Subject: [PATCH 13/13] docs(observability): ADR-024 + rotation runbook for grafana_reader MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ADR-024 records the deliberate cross-domain link (obs-grafana joins archiv-net to query archive-db via the SELECT-only grafana_reader role), the rejected alternatives (Prometheus exporter, read replica, versioned migration + flyway repair, hardcoded fallback), and the consequences — specifically that a Grafana compromise gains TCP reach to archive-db but is bounded by the role's least-privilege grants. The DEPLOYMENT.md runbook documents the rotation procedure that R__grafana_reader_password.sql now enables: bump GRAFANA_DB_PASSWORD, restart backend (Flyway re-applies because the resolved checksum changed), restart obs-grafana (datasource picks up the new env var). Also calls out the fail-closed startup behavior so operators who hit IllegalStateException know it is deliberate. Co-Authored-By: Claude Opus 4.7 --- docs/DEPLOYMENT.md | 25 ++++ ...na-reads-archive-db-via-bridged-network.md | 123 ++++++++++++++++++ 2 files changed, 148 insertions(+) create mode 100644 docs/adr/024-grafana-reads-archive-db-via-bridged-network.md diff --git a/docs/DEPLOYMENT.md b/docs/DEPLOYMENT.md index 28169825..c6560a0a 100644 --- a/docs/DEPLOYMENT.md +++ b/docs/DEPLOYMENT.md @@ -430,6 +430,31 @@ docker exec obs-loki wget -qO- \ Prometheus port `9090` and Grafana port `3003` (default; configurable via `PORT_GRAFANA`) are bound to `127.0.0.1` on the host. No other observability ports are host-bound. +##### Rotate the `grafana_reader` DB password + +The PO Overview dashboard reads `audit_log`, `documents`, and `transcription_blocks` through the SELECT-only `grafana_reader` PostgreSQL role (issue #651, ADR-024). The role's password is owned by `R__grafana_reader_password.sql` — a Flyway *repeatable* migration that re-runs whenever the resolved `${grafanaDbPassword}` placeholder changes. That makes rotation a two-restart operation, no manual `psql` required. + +```bash +# 1. Generate a new value +openssl rand -hex 32 + +# 2. Update both sides: +# - Gitea secret GRAFANA_DB_PASSWORD (nightly + release workflows pick it up) +# - Local .env on the server / dev machine + +# 3. Restart the backend. Flyway sees that R__'s resolved checksum changed and +# re-applies it, issuing ALTER ROLE grafana_reader WITH PASSWORD ''. +docker compose restart backend + +# 4. Restart obs-grafana so the provisioned datasource picks up the new env value. +docker compose -f docker-compose.observability.yml restart obs-grafana + +# 5. Verify the dashboard loads — PO Overview's Postgres panels should populate +# instead of "Data source error". +``` + +If `GRAFANA_DB_PASSWORD` is unset, the backend **refuses to start** (`IllegalStateException`). That is deliberate — see `FlywayConfig.resolveGrafanaDbPassword()` and the rationale in ADR-024. + #### GlitchTip | Item | Value | diff --git a/docs/adr/024-grafana-reads-archive-db-via-bridged-network.md b/docs/adr/024-grafana-reads-archive-db-via-bridged-network.md new file mode 100644 index 00000000..da47cdf3 --- /dev/null +++ b/docs/adr/024-grafana-reads-archive-db-via-bridged-network.md @@ -0,0 +1,123 @@ +# ADR-024: Grafana reads archive-db via a bridged network and a SELECT-only role + +## Status + +Accepted + +## Context + +Issue #651 (the PO Overview Grafana dashboard) needs aggregates over three +tables in the main application database — `audit_log`, `documents`, and +`transcription_blocks` — to answer the operator's four weekly questions: is +everything working, are people using it, is the archive making progress, is +OCR working well. + +Until now, `obs-grafana` and the rest of the observability stack lived on +their own Docker network (`obs-net`) and never touched `archiv-net`, where +`archive-db` runs. The two were intentionally isolated: a compromise of any +observability container could not pivot to the application database. + +The PO Overview's archive-progress and user-activity panels need rolling +7-day SQL aggregates that cannot be served by Prometheus or Loki. That +forces a connection from `obs-grafana` to `archive-db` for the first time. + +Two implementation requirements shaped the design: + +1. **Least privilege on the database side.** The Spring Boot application + role (`archiv`) has full read/write on every table. Letting Grafana + connect with that role would mean a Grafana compromise becomes an + application compromise. The dashboard only needs SELECT on three + tables; the role must reflect that and nothing more. + +2. **Operational simplicity of secret rotation.** The role's password is + shared between the migration that sets it and the Grafana datasource + that uses it. A first version of this work put the password in a + versioned Flyway migration (V68), which Flyway only applies once — + leaving rotation as an out-of-band `psql ALTER ROLE` step that no + runbook documented. The shape must support rotation without manual + SQL. + +## Decision + +- Provision a dedicated PostgreSQL role `grafana_reader` with `LOGIN` plus + `GRANT SELECT` on `audit_log`, `documents`, `transcription_blocks` only. + No INSERT/UPDATE/DELETE on any table, no access to any other table — + enforced by the database, locked in by both positive and parameterized + negative tests in `GrafanaReaderRoleIntegrationTest`. +- Split the role's lifecycle across two migrations: + - `V68__add_grafana_reader_role.sql` — versioned, immutable, idempotent. + Creates the role and applies the grants. Runs exactly once per + database, like every other versioned migration. + - `R__grafana_reader_password.sql` — Flyway *repeatable* migration that + issues `ALTER ROLE grafana_reader WITH PASSWORD '${grafanaDbPassword}'`. + Flyway computes the checksum on the resolved content, so any change + to `GRAFANA_DB_PASSWORD` flips the checksum and re-applies the + migration on the next boot. Rotation becomes "bump env var, restart + backend, restart obs-grafana" — see the runbook in + `docs/DEPLOYMENT.md §4 → Rotate the grafana_reader DB password`. +- Resolve the password through Spring's `Environment` rather than a raw + `System.getenv()` call, so tests inject via `application.properties` + and the resolver is unit-testable with `MockEnvironment`. Fail closed + with `IllegalStateException` when the variable is unset — no fallback + string. Same shape as `UserDataInitializer`'s refusal to seed default + admin credentials outside dev/test/e2e. +- Join `obs-grafana` to `archiv-net` in addition to `obs-net`. Only the + Grafana container crosses the boundary; Loki, Tempo, Prometheus, + GlitchTip, and the worker containers remain `obs-net`-only. + +## Consequences + +**Positive** + +- Database-level least privilege: a Grafana compromise gains SELECT on + three tables. Cannot write, cannot read PII tables like `app_users`, + `persons`, `notifications`, `document_comments`, `geschichten`. The + parameterized PII negative sweep in `GrafanaReaderRoleIntegrationTest` + is the regression gate; new sensitive tables get added to that list. +- Rotation is documented, idempotent, and survives operator turnover. + No "the password set on day 1 is the password forever" failure mode. +- Tests pin down both sides of the boundary: positive grants must hold, + write-deny must hold, and the PII negative list must stay empty. + +**Negative / trade-offs** + +- `obs-net` is no longer fully isolated from `archiv-net`. A Grafana RCE + (e.g. via a future Grafana CVE) gains a TCP path to `archive-db` — + contained, but not impossible. The least-privilege role is the + mitigation; we accept that mitigation as sufficient for a single + bridged container. +- The backend must hold `GRAFANA_DB_PASSWORD` in its environment forever, + so Flyway can resolve the placeholder on every boot. A backend RCE + therefore also leaks the Grafana datasource password. Acceptable + because that password's blast radius is itself bounded by the + least-privilege grants on `grafana_reader`. + +## Alternatives considered + +- **Prometheus PostgreSQL exporter, no direct connection.** Loses ad-hoc + SQL aggregates — the dashboard would need every metric pre-defined as + an exporter query, with a redeploy to add a new one. The PO Overview + is the type of dashboard that grows panels over time; pre-defining + every aggregate is the wrong shape. +- **Read replica or logical-replication slot dedicated to Grafana.** + Real operational cost (extra Postgres instance, replication monitoring, + storage doubled) disproportionate to a weekly PO glance. +- **Versioned migration with `flyway repair` for rotation.** Rejected: + conflates schema lifecycle with credential lifecycle, requires manual + intervention to rotate, and the repair command's semantics are + surprising to operators unfamiliar with Flyway internals. +- **Hardcoded fallback password when env var is unset.** Rejected as a + security blocker: publishes a known credential for a role with read + access to user activity and full letter text. The fail-closed + behavior is the explicit defense. + +## References + +- Issue #651 — PO Overview Grafana dashboard +- `backend/src/main/resources/db/migration/V68__add_grafana_reader_role.sql` +- `backend/src/main/resources/db/migration/R__grafana_reader_password.sql` +- `backend/src/main/java/org/raddatz/familienarchiv/config/FlywayConfig.java` +- `backend/src/test/java/org/raddatz/familienarchiv/config/GrafanaReaderRoleIntegrationTest.java` +- `infra/observability/grafana/provisioning/datasources/datasources.yml` +- `docker-compose.observability.yml` — `archiv-net` bridge on `obs-grafana` +- `docs/DEPLOYMENT.md §4` — rotation runbook -- 2.49.1