diff --git a/.env.example b/.env.example index 5b928337..3d8ebb00 100644 --- a/.env.example +++ b/.env.example @@ -26,6 +26,30 @@ PORT_MAILPIT_SMTP=1025 # Generate with: python3 -c "import secrets; print(secrets.token_hex(32))" OCR_TRAINING_TOKEN=change-me-in-production +# --- Observability --- +# Optional stack — start with: docker compose -f docker-compose.observability.yml up -d +# Requires the main stack to already be running (docker compose up -d creates archiv-net). + +# Ports for host access +PORT_GRAFANA=3001 +PORT_GLITCHTIP=3002 +PORT_PROMETHEUS=9090 + +# GlitchTip domain — production: use https://grafana.raddatz.cloud (must match Caddy vhost) +GLITCHTIP_DOMAIN=http://localhost:3002 + +# GlitchTip secret key — Django SECRET_KEY equivalent, used to sign sessions and tokens. +# REQUIRED in production — must not be empty or 'changeme'. Fail-closed: GlitchTip will +# refuse to start with an invalid key. +# Generate with: python3 -c "import secrets; print(secrets.token_hex(50))" +GLITCHTIP_SECRET_KEY=changeme-generate-a-real-secret + +# Error reporting DSNs — leave empty to disable the SDK (safe default). +# SENTRY_DSN: backend (Spring Boot) — used by the GlitchTip/Sentry Java SDK +SENTRY_DSN= +# VITE_SENTRY_DSN: frontend (SvelteKit) — injected at build time via Vite +VITE_SENTRY_DSN= + # Production SMTP — uncomment and fill in to send real emails instead of catching them # APP_BASE_URL=https://your-domain.example.com # MAIL_HOST=smtp.example.com diff --git a/docker-compose.observability.yml b/docker-compose.observability.yml new file mode 100644 index 00000000..461f2905 --- /dev/null +++ b/docker-compose.observability.yml @@ -0,0 +1,48 @@ +# Observability stack — Grafana LGTM + GlitchTip +# +# Requires the main stack to be running first: +# docker compose up -d # creates archiv-net +# docker compose -f docker-compose.observability.yml up -d +# +# To validate without starting: +# docker compose -f docker-compose.observability.yml config + +# No services defined yet — added in subsequent issues: +# +# --- Metrics: Prometheus --- +# prometheus: (see issue #573) +# +# --- Logs: Loki + Promtail --- +# loki: (see issue #574) +# promtail: (see issue #575) +# +# --- Traces: Tempo --- +# tempo: (see future issue) +# +# --- Dashboards: Grafana --- +# grafana: (see future issue) +# +# --- Error Tracking: GlitchTip --- +# glitchtip: (see future issue) +services: {} + +networks: + # Shared network created by the main docker-compose.yml. + # The observability stack joins as a peer so Prometheus can scrape + # archive-backend by container name. The observability stack must NOT + # attempt to create this network — it will fail with a clear error if + # the main stack is not running yet. + archiv-net: + external: true + + # Internal network for observability-service-to-service traffic + # (e.g. Grafana → Prometheus, Grafana → Loki, Grafana → Tempo). + obs-net: + driver: bridge + +volumes: + prometheus_data: + loki_data: + tempo_data: + grafana_data: + glitchtip_data: diff --git a/docs/DEPLOYMENT.md b/docs/DEPLOYMENT.md index 58d2769e..1bfb49e1 100644 --- a/docs/DEPLOYMENT.md +++ b/docs/DEPLOYMENT.md @@ -256,9 +256,9 @@ docker compose logs --tail=200 - **Spring Actuator health**: `http://localhost:8080/actuator/health` (internal only in prod — port 8081 for Prometheus scraping) - **Prometheus scraping**: management port 8081, path `/actuator/prometheus`. Internal only; Caddy blocks `/actuator/*` externally. -### Future observability +### Observability stack -Phase 7 of the Production v1 milestone adds Prometheus + Loki + Grafana. No monitoring infrastructure is in place yet. +An observability stack (Prometheus + Loki + Grafana) is available via `docker-compose.observability.yml` and configuration lives under `infra/observability/`. It joins the `archiv-net` Docker network to scrape the backend's management port. Full wiring and runbook documentation is tracked in issue #581. --- diff --git a/docs/architecture/c4/l2-containers.puml b/docs/architecture/c4/l2-containers.puml index 367b7d93..f27eda69 100644 --- a/docs/architecture/c4/l2-containers.puml +++ b/docs/architecture/c4/l2-containers.puml @@ -17,6 +17,12 @@ System_Boundary(archiv, "Familienarchiv (Docker Compose)") { Container(mc, "Bucket / Service-Account Init", "MinIO Client (mc)", "One-shot container on startup. Idempotent: creates the archive bucket, the archiv-app service account, and attaches the readwrite policy.") } +System_Boundary(observability, "Observability Stack (docker-compose.observability.yml / archiv-net)") { + Container(prometheus, "Prometheus", "prom/prometheus", "Scrapes metrics from backend management port 8081 (/actuator/prometheus). Retention and alert rules TBD — see issue #581.") + Container(loki, "Loki", "grafana/loki", "Log aggregation. Receives structured logs from the stack. Wiring TBD — see issue #581.") + Container(grafana, "Grafana", "grafana/grafana", "Dashboards and alerting UI. Data sources: Prometheus + Loki. Wiring TBD — see issue #581.") +} + Rel(user, caddy, "HTTPS", "TLS 1.2/1.3") Rel(caddy, frontend, "Reverse proxies non-/api requests", "HTTP / loopback:3000") Rel(caddy, backend, "Reverse proxies /api/*", "HTTP / loopback:8080") diff --git a/infra/observability/grafana/provisioning/dashboards/.gitkeep b/infra/observability/grafana/provisioning/dashboards/.gitkeep new file mode 100644 index 00000000..e69de29b diff --git a/infra/observability/grafana/provisioning/datasources/.gitkeep b/infra/observability/grafana/provisioning/datasources/.gitkeep new file mode 100644 index 00000000..e69de29b diff --git a/infra/observability/loki/.gitkeep b/infra/observability/loki/.gitkeep new file mode 100644 index 00000000..e69de29b diff --git a/infra/observability/prometheus/.gitkeep b/infra/observability/prometheus/.gitkeep new file mode 100644 index 00000000..e69de29b diff --git a/infra/observability/promtail/.gitkeep b/infra/observability/promtail/.gitkeep new file mode 100644 index 00000000..e69de29b diff --git a/infra/observability/tempo/.gitkeep b/infra/observability/tempo/.gitkeep new file mode 100644 index 00000000..e69de29b