diff --git a/infra/observability/grafana/provisioning/dashboards/po-overview.json b/infra/observability/grafana/provisioning/dashboards/po-overview.json new file mode 100644 index 00000000..99435c23 --- /dev/null +++ b/infra/observability/grafana/provisioning/dashboards/po-overview.json @@ -0,0 +1,702 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { "type": "grafana", "uid": "grafana" }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "description": "Product owner overview — system health, user activity, archive progress, and OCR quality at a weekly glance.", + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": null, + "links": [], + "liveNow": false, + "panels": [ + { + "collapsed": false, + "gridPos": { "h": 1, "w": 24, "x": 0, "y": 0 }, + "id": 100, + "title": "System Health", + "type": "row", + "panels": [] + }, + { + "id": 1, + "title": "Backend Status", + "type": "stat", + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "gridPos": { "h": 4, "w": 6, "x": 0, "y": 1 }, + "targets": [ + { + "expr": "up{job=\"spring-boot\"}", + "instant": true, + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "mappings": [ + { "type": "value", "options": { "0": { "text": "DOWN", "color": "red" } } }, + { "type": "value", "options": { "1": { "text": "UP", "color": "green" } } } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { "color": "red", "value": null }, + { "color": "green", "value": 1 } + ] + }, + "color": { "mode": "thresholds" } + } + }, + "options": { + "colorMode": "background", + "graphMode": "none", + "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, + "textMode": "value" + } + }, + { + "id": 2, + "title": "Server Errors (5xx)", + "type": "stat", + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "gridPos": { "h": 4, "w": 6, "x": 6, "y": 1 }, + "targets": [ + { + "expr": "sum(increase(http_server_requests_seconds_count{status=~\"5..\"}[$__range]))", + "instant": true, + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "unit": "short", + "decimals": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { "color": "green", "value": null }, + { "color": "yellow", "value": 1 }, + { "color": "red", "value": 6 } + ] + }, + "color": { "mode": "thresholds" } + } + }, + "options": { + "colorMode": "background", + "graphMode": "none", + "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false } + } + }, + { + "id": 3, + "title": "Response Time (p95)", + "type": "stat", + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "gridPos": { "h": 4, "w": 6, "x": 12, "y": 1 }, + "targets": [ + { + "expr": "histogram_quantile(0.95, sum(rate(http_server_requests_seconds_bucket[$__range])) by (le))", + "instant": true, + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "unit": "s", + "decimals": 2, + "thresholds": { + "mode": "absolute", + "steps": [ + { "color": "green", "value": null }, + { "color": "yellow", "value": 0.5 }, + { "color": "red", "value": 2 } + ] + }, + "color": { "mode": "thresholds" } + } + }, + "options": { + "colorMode": "background", + "graphMode": "none", + "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false } + } + }, + { + "id": 4, + "title": "Error Log Count", + "type": "stat", + "datasource": { "type": "loki", "uid": "loki" }, + "gridPos": { "h": 4, "w": 6, "x": 18, "y": 1 }, + "targets": [ + { + "expr": "sum(count_over_time({compose_service=\"backend\"} | json | level=\"ERROR\" [$__range]))", + "queryType": "instant", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "unit": "short", + "decimals": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { "color": "green", "value": null }, + { "color": "yellow", "value": 1 }, + { "color": "red", "value": 10 } + ] + }, + "color": { "mode": "thresholds" } + } + }, + "options": { + "colorMode": "background", + "graphMode": "none", + "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false } + } + }, + { + "id": 5, + "title": "CPU Usage", + "type": "bargauge", + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "gridPos": { "h": 5, "w": 8, "x": 0, "y": 5 }, + "targets": [ + { + "expr": "100 - (avg(rate(node_cpu_seconds_total{mode=\"idle\"}[5m])) * 100)", + "instant": true, + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "unit": "percent", + "min": 0, + "max": 100, + "decimals": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { "color": "green", "value": null }, + { "color": "yellow", "value": 70 }, + { "color": "red", "value": 85 } + ] + }, + "color": { "mode": "thresholds" } + } + }, + "options": { + "displayMode": "gradient", + "orientation": "horizontal", + "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, + "showUnfilled": true + } + }, + { + "id": 6, + "title": "Memory Usage", + "type": "bargauge", + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "gridPos": { "h": 5, "w": 8, "x": 8, "y": 5 }, + "targets": [ + { + "expr": "(1 - (node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes)) * 100", + "instant": true, + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "unit": "percent", + "min": 0, + "max": 100, + "decimals": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { "color": "green", "value": null }, + { "color": "yellow", "value": 70 }, + { "color": "red", "value": 85 } + ] + }, + "color": { "mode": "thresholds" } + } + }, + "options": { + "displayMode": "gradient", + "orientation": "horizontal", + "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, + "showUnfilled": true + } + }, + { + "id": 7, + "title": "Disk Usage", + "type": "bargauge", + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "gridPos": { "h": 5, "w": 8, "x": 16, "y": 5 }, + "targets": [ + { + "expr": "(1 - (node_filesystem_avail_bytes{mountpoint=\"/\"} / node_filesystem_size_bytes{mountpoint=\"/\"})) * 100", + "instant": true, + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "unit": "percent", + "min": 0, + "max": 100, + "decimals": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { "color": "green", "value": null }, + { "color": "yellow", "value": 70 }, + { "color": "red", "value": 80 } + ] + }, + "color": { "mode": "thresholds" } + } + }, + "options": { + "displayMode": "gradient", + "orientation": "horizontal", + "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, + "showUnfilled": true + } + }, + { + "collapsed": false, + "gridPos": { "h": 1, "w": 24, "x": 0, "y": 10 }, + "id": 101, + "title": "User Activity", + "type": "row", + "panels": [] + }, + { + "id": 8, + "title": "Active Users", + "type": "stat", + "datasource": { "type": "postgres", "uid": "postgres" }, + "gridPos": { "h": 4, "w": 8, "x": 0, "y": 11 }, + "targets": [ + { + "rawSql": "SELECT COUNT(DISTINCT actor_id) AS value FROM audit_log WHERE happened_at >= NOW() - INTERVAL '7 days' AND kind = 'LOGIN_SUCCESS'", + "format": "table", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "unit": "short", + "decimals": 0, + "color": { "mode": "fixed", "fixedColor": "blue" } + } + }, + "options": { + "colorMode": "value", + "graphMode": "none", + "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false } + } + }, + { + "id": 9, + "title": "Total Logins", + "type": "stat", + "datasource": { "type": "postgres", "uid": "postgres" }, + "gridPos": { "h": 4, "w": 8, "x": 8, "y": 11 }, + "targets": [ + { + "rawSql": "SELECT COUNT(*) AS value FROM audit_log WHERE happened_at >= NOW() - INTERVAL '7 days' AND kind = 'LOGIN_SUCCESS'", + "format": "table", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "unit": "short", + "decimals": 0, + "color": { "mode": "fixed", "fixedColor": "blue" } + } + }, + "options": { + "colorMode": "value", + "graphMode": "none", + "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false } + } + }, + { + "id": 10, + "title": "Failed Login Attempts", + "type": "stat", + "datasource": { "type": "postgres", "uid": "postgres" }, + "gridPos": { "h": 4, "w": 8, "x": 16, "y": 11 }, + "targets": [ + { + "rawSql": "SELECT COUNT(*) AS value FROM audit_log WHERE happened_at >= NOW() - INTERVAL '7 days' AND kind IN ('LOGIN_FAILED', 'LOGIN_RATE_LIMITED')", + "format": "table", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "unit": "short", + "decimals": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { "color": "green", "value": null }, + { "color": "yellow", "value": 1 }, + { "color": "red", "value": 4 } + ] + }, + "color": { "mode": "thresholds" } + } + }, + "options": { + "colorMode": "background", + "graphMode": "none", + "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false } + } + }, + { + "id": 11, + "title": "Daily Logins (last 7 days)", + "type": "barchart", + "datasource": { "type": "postgres", "uid": "postgres" }, + "gridPos": { "h": 7, "w": 24, "x": 0, "y": 15 }, + "targets": [ + { + "rawSql": "SELECT DATE_TRUNC('day', happened_at) AS time, COUNT(*) AS logins FROM audit_log WHERE happened_at >= NOW() - INTERVAL '7 days' AND kind = 'LOGIN_SUCCESS' GROUP BY 1 ORDER BY 1", + "format": "time_series", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "unit": "short", + "decimals": 0, + "color": { "mode": "fixed", "fixedColor": "blue" } + } + }, + "options": { + "legend": { "displayMode": "hidden" }, + "orientation": "auto", + "showValue": "auto", + "stacking": "none", + "xTickLabelRotation": 0, + "xTickLabelSpacing": 0 + } + }, + { + "collapsed": false, + "gridPos": { "h": 1, "w": 24, "x": 0, "y": 22 }, + "id": 102, + "title": "Archive Progress", + "type": "row", + "panels": [] + }, + { + "id": 12, + "title": "Transcription Coverage", + "type": "bargauge", + "datasource": { "type": "postgres", "uid": "postgres" }, + "gridPos": { "h": 5, "w": 24, "x": 0, "y": 23 }, + "targets": [ + { + "rawSql": "SELECT (COUNT(*) FILTER (WHERE text IS NOT NULL AND text <> ''))::float * 100.0 / NULLIF(COUNT(*), 0) AS percent_complete FROM transcription_blocks", + "format": "table", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "unit": "percent", + "min": 0, + "max": 100, + "decimals": 1, + "thresholds": { + "mode": "absolute", + "steps": [ + { "color": "red", "value": null }, + { "color": "yellow", "value": 25 }, + { "color": "green", "value": 75 } + ] + }, + "color": { "mode": "thresholds" } + } + }, + "options": { + "displayMode": "gradient", + "orientation": "horizontal", + "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, + "showUnfilled": true + } + }, + { + "id": 13, + "title": "Total Documents", + "type": "stat", + "datasource": { "type": "postgres", "uid": "postgres" }, + "gridPos": { "h": 4, "w": 6, "x": 0, "y": 28 }, + "targets": [ + { + "rawSql": "SELECT COUNT(*) AS value FROM documents WHERE status <> 'PLACEHOLDER'", + "format": "table", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "unit": "short", + "decimals": 0, + "color": { "mode": "fixed", "fixedColor": "blue" } + } + }, + "options": { + "colorMode": "value", + "graphMode": "none", + "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false } + } + }, + { + "id": 14, + "title": "Uploads This Week", + "type": "stat", + "datasource": { "type": "postgres", "uid": "postgres" }, + "gridPos": { "h": 4, "w": 6, "x": 6, "y": 28 }, + "targets": [ + { + "rawSql": "SELECT COUNT(*) AS value FROM audit_log WHERE happened_at >= NOW() - INTERVAL '7 days' AND kind = 'FILE_UPLOADED'", + "format": "table", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "unit": "short", + "decimals": 0, + "color": { "mode": "fixed", "fixedColor": "blue" } + } + }, + "options": { + "colorMode": "value", + "graphMode": "none", + "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false } + } + }, + { + "id": 15, + "title": "Blocks Transcribed This Week", + "type": "stat", + "datasource": { "type": "postgres", "uid": "postgres" }, + "gridPos": { "h": 4, "w": 6, "x": 12, "y": 28 }, + "targets": [ + { + "rawSql": "SELECT COUNT(*) AS value FROM audit_log WHERE happened_at >= NOW() - INTERVAL '7 days' AND kind = 'TEXT_SAVED'", + "format": "table", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "unit": "short", + "decimals": 0, + "color": { "mode": "fixed", "fixedColor": "blue" } + } + }, + "options": { + "colorMode": "value", + "graphMode": "none", + "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false } + } + }, + { + "id": 16, + "title": "Blocks Reviewed This Week", + "type": "stat", + "datasource": { "type": "postgres", "uid": "postgres" }, + "gridPos": { "h": 4, "w": 6, "x": 18, "y": 28 }, + "targets": [ + { + "rawSql": "SELECT COUNT(*) AS value FROM audit_log WHERE happened_at >= NOW() - INTERVAL '7 days' AND kind = 'BLOCK_REVIEWED'", + "format": "table", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "unit": "short", + "decimals": 0, + "color": { "mode": "fixed", "fixedColor": "blue" } + } + }, + "options": { + "colorMode": "value", + "graphMode": "none", + "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false } + } + }, + { + "collapsed": false, + "gridPos": { "h": 1, "w": 24, "x": 0, "y": 32 }, + "id": 103, + "title": "OCR Health", + "type": "row", + "panels": [] + }, + { + "id": 17, + "title": "OCR Jobs", + "type": "stat", + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "gridPos": { "h": 4, "w": 6, "x": 0, "y": 33 }, + "targets": [ + { + "expr": "sum(increase(ocr_jobs_total[$__range]))", + "instant": true, + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "unit": "short", + "decimals": 0, + "color": { "mode": "fixed", "fixedColor": "blue" } + } + }, + "options": { + "colorMode": "value", + "graphMode": "none", + "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false } + } + }, + { + "id": 18, + "title": "OCR Page Error Rate", + "type": "stat", + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "gridPos": { "h": 4, "w": 6, "x": 6, "y": 33 }, + "targets": [ + { + "expr": "sum(increase(ocr_skipped_pages_total[$__range])) / clamp_min(sum(increase(ocr_pages_total[$__range])), 1)", + "instant": true, + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "unit": "percentunit", + "decimals": 1, + "thresholds": { + "mode": "absolute", + "steps": [ + { "color": "green", "value": null }, + { "color": "yellow", "value": 0.01 }, + { "color": "red", "value": 0.05 } + ] + }, + "color": { "mode": "thresholds" } + } + }, + "options": { + "colorMode": "background", + "graphMode": "none", + "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false } + } + }, + { + "id": 19, + "title": "Illegible Word Rate", + "type": "stat", + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "gridPos": { "h": 4, "w": 6, "x": 12, "y": 33 }, + "targets": [ + { + "expr": "sum(increase(ocr_illegible_words_total[$__range])) / clamp_min(sum(increase(ocr_words_total[$__range])), 1)", + "instant": true, + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "unit": "percentunit", + "decimals": 1, + "thresholds": { + "mode": "absolute", + "steps": [ + { "color": "green", "value": null }, + { "color": "yellow", "value": 0.1 }, + { "color": "red", "value": 0.25 } + ] + }, + "color": { "mode": "thresholds" } + } + }, + "options": { + "colorMode": "background", + "graphMode": "none", + "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false } + } + }, + { + "id": 20, + "title": "OCR Service Status", + "type": "stat", + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "gridPos": { "h": 4, "w": 6, "x": 18, "y": 33 }, + "targets": [ + { + "expr": "ocr_models_ready", + "instant": true, + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "mappings": [ + { "type": "value", "options": { "0": { "text": "NOT READY", "color": "red" } } }, + { "type": "value", "options": { "1": { "text": "READY", "color": "green" } } } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { "color": "red", "value": null }, + { "color": "green", "value": 1 } + ] + }, + "color": { "mode": "thresholds" } + } + }, + "options": { + "colorMode": "background", + "graphMode": "none", + "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, + "textMode": "value" + } + } + ], + "refresh": "", + "schemaVersion": 39, + "tags": ["po-overview", "familienarchiv"], + "templating": { "list": [] }, + "time": { "from": "now-7d", "to": "now" }, + "timepicker": {}, + "timezone": "browser", + "title": "PO Overview", + "uid": "po-overview", + "version": 1, + "weekStart": "" +}