From 1d42be98826fcc07be4811a44f2cf22d437e6211 Mon Sep 17 00:00:00 2001 From: Marcel Date: Fri, 15 May 2026 01:23:03 +0200 Subject: [PATCH 1/2] devops(observability): scaffold docker-compose.observability.yml and infra/observability/ structure Creates the skeleton observability stack (no running services yet) that all subsequent Grafana LGTM + GlitchTip issues depend on: - docker-compose.observability.yml: external archiv-net join, obs-net bridge, named volumes for all five services, placeholder comments for each service group (Metrics/Logs/Traces/Dashboards/Error Tracking), startup-order note - infra/observability/{prometheus,loki,promtail,tempo,grafana/provisioning/{datasources,dashboards}}/.gitkeep - .env.example: new # --- Observability --- section with PORT_GRAFANA, PORT_GLITCHTIP, PORT_PROMETHEUS, GLITCHTIP_DOMAIN, GLITCHTIP_SECRET_KEY (with generation hint), SENTRY_DSN, VITE_SENTRY_DSN Verified: docker compose -f docker-compose.observability.yml config exits 0 Co-Authored-By: Claude Sonnet 4.6 --- .env.example | 24 ++++++++++ docker-compose.observability.yml | 48 +++++++++++++++++++ .../grafana/provisioning/dashboards/.gitkeep | 0 .../grafana/provisioning/datasources/.gitkeep | 0 infra/observability/loki/.gitkeep | 0 infra/observability/prometheus/.gitkeep | 0 infra/observability/promtail/.gitkeep | 0 infra/observability/tempo/.gitkeep | 0 8 files changed, 72 insertions(+) create mode 100644 docker-compose.observability.yml create mode 100644 infra/observability/grafana/provisioning/dashboards/.gitkeep create mode 100644 infra/observability/grafana/provisioning/datasources/.gitkeep create mode 100644 infra/observability/loki/.gitkeep create mode 100644 infra/observability/prometheus/.gitkeep create mode 100644 infra/observability/promtail/.gitkeep create mode 100644 infra/observability/tempo/.gitkeep diff --git a/.env.example b/.env.example index 5b928337..3d8ebb00 100644 --- a/.env.example +++ b/.env.example @@ -26,6 +26,30 @@ PORT_MAILPIT_SMTP=1025 # Generate with: python3 -c "import secrets; print(secrets.token_hex(32))" OCR_TRAINING_TOKEN=change-me-in-production +# --- Observability --- +# Optional stack — start with: docker compose -f docker-compose.observability.yml up -d +# Requires the main stack to already be running (docker compose up -d creates archiv-net). + +# Ports for host access +PORT_GRAFANA=3001 +PORT_GLITCHTIP=3002 +PORT_PROMETHEUS=9090 + +# GlitchTip domain — production: use https://grafana.raddatz.cloud (must match Caddy vhost) +GLITCHTIP_DOMAIN=http://localhost:3002 + +# GlitchTip secret key — Django SECRET_KEY equivalent, used to sign sessions and tokens. +# REQUIRED in production — must not be empty or 'changeme'. Fail-closed: GlitchTip will +# refuse to start with an invalid key. +# Generate with: python3 -c "import secrets; print(secrets.token_hex(50))" +GLITCHTIP_SECRET_KEY=changeme-generate-a-real-secret + +# Error reporting DSNs — leave empty to disable the SDK (safe default). +# SENTRY_DSN: backend (Spring Boot) — used by the GlitchTip/Sentry Java SDK +SENTRY_DSN= +# VITE_SENTRY_DSN: frontend (SvelteKit) — injected at build time via Vite +VITE_SENTRY_DSN= + # Production SMTP — uncomment and fill in to send real emails instead of catching them # APP_BASE_URL=https://your-domain.example.com # MAIL_HOST=smtp.example.com diff --git a/docker-compose.observability.yml b/docker-compose.observability.yml new file mode 100644 index 00000000..461f2905 --- /dev/null +++ b/docker-compose.observability.yml @@ -0,0 +1,48 @@ +# Observability stack — Grafana LGTM + GlitchTip +# +# Requires the main stack to be running first: +# docker compose up -d # creates archiv-net +# docker compose -f docker-compose.observability.yml up -d +# +# To validate without starting: +# docker compose -f docker-compose.observability.yml config + +# No services defined yet — added in subsequent issues: +# +# --- Metrics: Prometheus --- +# prometheus: (see issue #573) +# +# --- Logs: Loki + Promtail --- +# loki: (see issue #574) +# promtail: (see issue #575) +# +# --- Traces: Tempo --- +# tempo: (see future issue) +# +# --- Dashboards: Grafana --- +# grafana: (see future issue) +# +# --- Error Tracking: GlitchTip --- +# glitchtip: (see future issue) +services: {} + +networks: + # Shared network created by the main docker-compose.yml. + # The observability stack joins as a peer so Prometheus can scrape + # archive-backend by container name. The observability stack must NOT + # attempt to create this network — it will fail with a clear error if + # the main stack is not running yet. + archiv-net: + external: true + + # Internal network for observability-service-to-service traffic + # (e.g. Grafana → Prometheus, Grafana → Loki, Grafana → Tempo). + obs-net: + driver: bridge + +volumes: + prometheus_data: + loki_data: + tempo_data: + grafana_data: + glitchtip_data: diff --git a/infra/observability/grafana/provisioning/dashboards/.gitkeep b/infra/observability/grafana/provisioning/dashboards/.gitkeep new file mode 100644 index 00000000..e69de29b diff --git a/infra/observability/grafana/provisioning/datasources/.gitkeep b/infra/observability/grafana/provisioning/datasources/.gitkeep new file mode 100644 index 00000000..e69de29b diff --git a/infra/observability/loki/.gitkeep b/infra/observability/loki/.gitkeep new file mode 100644 index 00000000..e69de29b diff --git a/infra/observability/prometheus/.gitkeep b/infra/observability/prometheus/.gitkeep new file mode 100644 index 00000000..e69de29b diff --git a/infra/observability/promtail/.gitkeep b/infra/observability/promtail/.gitkeep new file mode 100644 index 00000000..e69de29b diff --git a/infra/observability/tempo/.gitkeep b/infra/observability/tempo/.gitkeep new file mode 100644 index 00000000..e69de29b From cf8d22d81b69e49d08f94f799e4273a1c8521def Mon Sep 17 00:00:00 2001 From: Marcel Date: Fri, 15 May 2026 01:28:14 +0200 Subject: [PATCH 2/2] docs: update DEPLOYMENT.md and C4 diagram for observability scaffold MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace the stale "no monitoring infrastructure in place yet" note in §4 with a brief description of the observability compose file and a pointer to issue #581 for full docs. Add a placeholder System_Boundary block for Prometheus + Loki + Grafana to l2-containers.puml, showing the stack joins archiv-net. Co-Authored-By: Claude Sonnet 4.6 --- docs/DEPLOYMENT.md | 4 ++-- docs/architecture/c4/l2-containers.puml | 6 ++++++ 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/docs/DEPLOYMENT.md b/docs/DEPLOYMENT.md index 58d2769e..1bfb49e1 100644 --- a/docs/DEPLOYMENT.md +++ b/docs/DEPLOYMENT.md @@ -256,9 +256,9 @@ docker compose logs --tail=200 - **Spring Actuator health**: `http://localhost:8080/actuator/health` (internal only in prod — port 8081 for Prometheus scraping) - **Prometheus scraping**: management port 8081, path `/actuator/prometheus`. Internal only; Caddy blocks `/actuator/*` externally. -### Future observability +### Observability stack -Phase 7 of the Production v1 milestone adds Prometheus + Loki + Grafana. No monitoring infrastructure is in place yet. +An observability stack (Prometheus + Loki + Grafana) is available via `docker-compose.observability.yml` and configuration lives under `infra/observability/`. It joins the `archiv-net` Docker network to scrape the backend's management port. Full wiring and runbook documentation is tracked in issue #581. --- diff --git a/docs/architecture/c4/l2-containers.puml b/docs/architecture/c4/l2-containers.puml index 367b7d93..f27eda69 100644 --- a/docs/architecture/c4/l2-containers.puml +++ b/docs/architecture/c4/l2-containers.puml @@ -17,6 +17,12 @@ System_Boundary(archiv, "Familienarchiv (Docker Compose)") { Container(mc, "Bucket / Service-Account Init", "MinIO Client (mc)", "One-shot container on startup. Idempotent: creates the archive bucket, the archiv-app service account, and attaches the readwrite policy.") } +System_Boundary(observability, "Observability Stack (docker-compose.observability.yml / archiv-net)") { + Container(prometheus, "Prometheus", "prom/prometheus", "Scrapes metrics from backend management port 8081 (/actuator/prometheus). Retention and alert rules TBD — see issue #581.") + Container(loki, "Loki", "grafana/loki", "Log aggregation. Receives structured logs from the stack. Wiring TBD — see issue #581.") + Container(grafana, "Grafana", "grafana/grafana", "Dashboards and alerting UI. Data sources: Prometheus + Loki. Wiring TBD — see issue #581.") +} + Rel(user, caddy, "HTTPS", "TLS 1.2/1.3") Rel(caddy, frontend, "Reverse proxies non-/api requests", "HTTP / loopback:3000") Rel(caddy, backend, "Reverse proxies /api/*", "HTTP / loopback:8080")