feat(observability): add GlitchTip error tracking infrastructure
Some checks failed
Some checks failed
This commit was merged in pull request #590.
This commit is contained in:
@@ -166,7 +166,75 @@ services:
|
||||
- obs-net
|
||||
|
||||
# --- Error Tracking: GlitchTip ---
|
||||
# glitchtip: (see future issue)
|
||||
|
||||
obs-redis:
|
||||
image: redis:7-alpine
|
||||
container_name: obs-redis
|
||||
restart: unless-stopped
|
||||
volumes:
|
||||
- glitchtip_data:/data
|
||||
expose:
|
||||
- "6379"
|
||||
healthcheck:
|
||||
test: ["CMD", "redis-cli", "ping"]
|
||||
interval: 10s
|
||||
timeout: 5s
|
||||
retries: 5
|
||||
networks:
|
||||
- obs-net
|
||||
|
||||
obs-glitchtip:
|
||||
image: glitchtip/glitchtip:v4
|
||||
container_name: obs-glitchtip
|
||||
restart: unless-stopped
|
||||
depends_on:
|
||||
obs-redis:
|
||||
condition: service_healthy
|
||||
obs-glitchtip-db-init:
|
||||
condition: service_completed_successfully
|
||||
environment:
|
||||
DATABASE_URL: postgresql://${POSTGRES_USER}:${POSTGRES_PASSWORD}@archive-db:5432/glitchtip
|
||||
REDIS_URL: redis://obs-redis:6379/0
|
||||
SECRET_KEY: ${GLITCHTIP_SECRET_KEY}
|
||||
GLITCHTIP_DOMAIN: ${GLITCHTIP_DOMAIN:-http://localhost:3002}
|
||||
DEFAULT_FROM_EMAIL: ${APP_MAIL_FROM:-noreply@familienarchiv.local}
|
||||
EMAIL_URL: smtp://mailpit:1025
|
||||
GLITCHTIP_MAX_EVENT_LIFE_DAYS: 90
|
||||
ports:
|
||||
- "127.0.0.1:${PORT_GLITCHTIP:-3002}:8080"
|
||||
networks:
|
||||
- archiv-net
|
||||
- obs-net
|
||||
|
||||
obs-glitchtip-worker:
|
||||
image: glitchtip/glitchtip:v4
|
||||
container_name: obs-glitchtip-worker
|
||||
restart: unless-stopped
|
||||
command: ./bin/run-celery-with-beat.sh
|
||||
depends_on:
|
||||
obs-redis:
|
||||
condition: service_healthy
|
||||
environment:
|
||||
DATABASE_URL: postgresql://${POSTGRES_USER}:${POSTGRES_PASSWORD}@archive-db:5432/glitchtip
|
||||
REDIS_URL: redis://obs-redis:6379/0
|
||||
SECRET_KEY: ${GLITCHTIP_SECRET_KEY}
|
||||
networks:
|
||||
- archiv-net
|
||||
- obs-net
|
||||
|
||||
obs-glitchtip-db-init:
|
||||
image: postgres:16-alpine
|
||||
container_name: obs-glitchtip-db-init
|
||||
restart: "no"
|
||||
environment:
|
||||
PGPASSWORD: ${POSTGRES_PASSWORD}
|
||||
command: >
|
||||
sh -c "psql -h archive-db -U ${POSTGRES_USER} -tc
|
||||
\"SELECT 1 FROM pg_database WHERE datname = 'glitchtip'\" |
|
||||
grep -q 1 ||
|
||||
psql -h archive-db -U ${POSTGRES_USER} -c \"CREATE DATABASE glitchtip;\""
|
||||
networks:
|
||||
- archiv-net
|
||||
|
||||
networks:
|
||||
# Shared network created by the main docker-compose.yml.
|
||||
|
||||
@@ -144,6 +144,9 @@ All vars are set in `.env` at the repo root (copy from `.env.example`). The back
|
||||
| `PORT_PROMETHEUS` | Host port for the Prometheus UI (bound to `127.0.0.1` only) | `9090` | — | — |
|
||||
| `PORT_GRAFANA` | Host port for the Grafana UI (bound to `127.0.0.1` only) | `3001` | — | — |
|
||||
| `GRAFANA_ADMIN_PASSWORD` | Grafana `admin` user password | `changeme` | YES (prod) | YES |
|
||||
| `PORT_GLITCHTIP` | Host port for the GlitchTip UI (bound to `127.0.0.1` only) | `3002` | — | — |
|
||||
| `GLITCHTIP_DOMAIN` | Public-facing base URL for GlitchTip (used in email links and CORS) | `http://localhost:3002` | YES (prod) | — |
|
||||
| `GLITCHTIP_SECRET_KEY` | Django secret key for GlitchTip — generate with `python3 -c "import secrets; print(secrets.token_hex(32))"` | — | YES | YES |
|
||||
|
||||
---
|
||||
|
||||
@@ -287,6 +290,10 @@ Current services:
|
||||
| `obs-promtail` | `grafana/promtail:3.4.2` | Log shipping agent — reads all Docker container logs via the Docker socket and forwards them to Loki with `container_name`, `compose_service`, and `compose_project` labels |
|
||||
| `obs-tempo` | `grafana/tempo:2.7.2` | Distributed trace storage — OTLP gRPC receiver on port 4317, OTLP HTTP on port 4318 (both `archiv-net`-internal). Grafana queries traces on port 3200 (`obs-net`-internal). All ports are `expose`-only (not host-bound). |
|
||||
| `obs-grafana` | `grafana/grafana-oss:11.6.1` | Unified observability UI — metrics dashboards, log exploration, trace viewer. Bound to `127.0.0.1:${PORT_GRAFANA:-3001}` on the host. |
|
||||
| `obs-glitchtip` | `glitchtip/glitchtip:v4` | Sentry-compatible error tracker. Receives frontend + backend error events, groups by fingerprint, provides issue UI with stack traces. Bound to `127.0.0.1:${PORT_GLITCHTIP:-3002}`. |
|
||||
| `obs-glitchtip-worker` | `glitchtip/glitchtip:v4` | Celery + beat worker — processes async GlitchTip tasks (event ingestion, notifications, cleanup). |
|
||||
| `obs-redis` | `redis:7-alpine` | Celery task broker for GlitchTip. Internal to `obs-net`; no host port exposed. |
|
||||
| `obs-glitchtip-db-init` | `postgres:16-alpine` | One-shot init container. Creates the `glitchtip` database on the existing `archive-db` PostgreSQL instance if it does not already exist. Runs at stack startup; exits cleanly once done. |
|
||||
|
||||
#### Grafana
|
||||
|
||||
@@ -324,6 +331,39 @@ docker exec obs-loki wget -qO- \
|
||||
|
||||
Prometheus port `9090` and Grafana port `3001` are bound to `127.0.0.1` on the host. No other observability ports are host-bound.
|
||||
|
||||
#### GlitchTip
|
||||
|
||||
| Item | Value |
|
||||
|---|---|
|
||||
| URL | `http://localhost:3002` (or `http://localhost:$PORT_GLITCHTIP`) |
|
||||
|
||||
**Required env vars** — set in `.env` before first start:
|
||||
|
||||
```bash
|
||||
GLITCHTIP_SECRET_KEY=$(python3 -c "import secrets; print(secrets.token_hex(32))")
|
||||
GLITCHTIP_DOMAIN=http://localhost:3002 # change to your public URL in prod
|
||||
PORT_GLITCHTIP=3002 # optional, defaults to 3002
|
||||
```
|
||||
|
||||
**Database:** GlitchTip shares the existing `archive-db` PostgreSQL instance. The `obs-glitchtip-db-init` one-shot container creates a dedicated `glitchtip` database on first stack start — no manual step required.
|
||||
|
||||
**First-run steps** (one-time, after `docker compose -f docker-compose.observability.yml up -d`):
|
||||
|
||||
```bash
|
||||
# 1. Create the Django superuser (interactive)
|
||||
docker exec -it obs-glitchtip ./manage.py createsuperuser
|
||||
|
||||
# 2. Open the GlitchTip UI and log in
|
||||
open http://localhost:3002
|
||||
|
||||
# 3. Create an organisation (e.g. "Familienarchiv")
|
||||
# 4. Create two projects:
|
||||
# - "familienarchiv-frontend" (platform: JavaScript / SvelteKit)
|
||||
# - "familienarchiv-backend" (platform: Java / Spring Boot)
|
||||
# 5. Copy each project's DSN from Settings → Projects → <project> → Client Keys
|
||||
# 6. Wire the DSNs into the backend and frontend via env vars (separate issue)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 5. Backup + recovery
|
||||
|
||||
@@ -25,6 +25,9 @@ System_Boundary(observability, "Observability Stack (docker-compose.observabilit
|
||||
Container(promtail, "Promtail", "grafana/promtail:3.4.2", "Ships Docker container logs to Loki via Docker SD.")
|
||||
Container(tempo, "Tempo", "grafana/tempo:2.7.2", "Distributed trace storage. OTLP gRPC receiver on port 4317 (archiv-net). Grafana queries traces on port 3200 (obs-net). All ports internal only.")
|
||||
Container(grafana, "Grafana", "grafana/grafana-oss:11.6.1", "Unified observability UI — dashboards, logs, traces. Datasources (Prometheus, Loki, Tempo) and three dashboards are auto-provisioned.")
|
||||
Container(glitchtip, "GlitchTip", "glitchtip/glitchtip:v4", "Sentry-compatible error tracker — web process. Receives frontend + backend error events, groups by fingerprint, provides issue UI with stack traces.")
|
||||
Container(obs_glitchtip_worker, "GlitchTip Worker", "glitchtip/glitchtip:v4", "Celery + beat worker — async event ingestion, notifications, cleanup.")
|
||||
Container(obs_redis, "Redis", "redis:7-alpine", "Celery task queue for GlitchTip async workers.")
|
||||
}
|
||||
|
||||
Rel(user, caddy, "HTTPS", "TLS 1.2/1.3")
|
||||
@@ -43,5 +46,7 @@ Rel(backend, tempo, "Sends distributed traces via OTLP", "gRPC / OTLP / port 431
|
||||
Rel(grafana, prometheus, "Queries metrics", "HTTP 9090")
|
||||
Rel(grafana, loki, "Queries logs", "HTTP 3100")
|
||||
Rel(grafana, tempo, "Queries traces", "HTTP 3200")
|
||||
Rel(glitchtip, db, "Stores error events in glitchtip DB", "PostgreSQL / archiv-net")
|
||||
Rel(obs_glitchtip_worker, obs_redis, "Processes Celery tasks", "Redis / obs-net")
|
||||
|
||||
@enduml
|
||||
|
||||
Reference in New Issue
Block a user