Merge pull request 'devops(observability): add Tempo for distributed trace storage (OTLP receiver)' (#587) from feat/issue-575-tempo into main
All checks were successful
All checks were successful
devops(observability): add Tempo for distributed trace storage (#587)
This commit was merged in pull request #587.
This commit is contained in:
@@ -112,8 +112,29 @@ services:
|
||||
condition: service_healthy
|
||||
|
||||
# --- Traces: Tempo ---
|
||||
# tempo: (see future issue)
|
||||
#
|
||||
|
||||
tempo:
|
||||
image: grafana/tempo:2.7.2
|
||||
container_name: obs-tempo
|
||||
restart: unless-stopped
|
||||
volumes:
|
||||
- ./infra/observability/tempo/tempo.yml:/etc/tempo.yml:ro
|
||||
- tempo_data:/var/tempo
|
||||
command: -config.file=/etc/tempo.yml
|
||||
expose:
|
||||
- "3200" # Grafana queries Tempo on this port (obs-net only)
|
||||
- "4317" # OTLP gRPC — backend sends traces here (archiv-net)
|
||||
- "4318" # OTLP HTTP — alternative transport (archiv-net)
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "wget -qO- http://localhost:3200/ready | grep -q ready || exit 1"]
|
||||
interval: 10s
|
||||
timeout: 5s
|
||||
retries: 5
|
||||
start_period: 15s
|
||||
networks:
|
||||
- archiv-net # backend (archive-backend) reaches tempo:4317 over this network
|
||||
- obs-net # Grafana reaches tempo:3200 over this network
|
||||
|
||||
# --- Dashboards: Grafana ---
|
||||
# grafana: (see future issue)
|
||||
#
|
||||
|
||||
@@ -281,6 +281,7 @@ Current services:
|
||||
| `obs-cadvisor` | `gcr.io/cadvisor/cadvisor:v0.52.1` | Per-container resource metrics |
|
||||
| `obs-loki` | `grafana/loki:3.4.2` | Log aggregation — receives log streams from Promtail. Port 3100 is `expose`-only (not host-bound). |
|
||||
| `obs-promtail` | `grafana/promtail:3.4.2` | Log shipping agent — reads all Docker container logs via the Docker socket and forwards them to Loki with `container_name`, `compose_service`, and `compose_project` labels |
|
||||
| `obs-tempo` | `grafana/tempo:2.7.2` | Distributed trace storage — OTLP gRPC receiver on port 4317, OTLP HTTP on port 4318 (both `archiv-net`-internal). Grafana queries traces on port 3200 (`obs-net`-internal). All ports are `expose`-only (not host-bound). |
|
||||
|
||||
**Loki quick checks** (after ~60 s, run from inside the `obs-loki` container):
|
||||
|
||||
|
||||
@@ -21,7 +21,8 @@ System_Boundary(observability, "Observability Stack (docker-compose.observabilit
|
||||
Container(prometheus, "Prometheus", "prom/prometheus", "Scrapes metrics from backend management port 8081 (/actuator/prometheus). Retention and alert rules TBD — see issue #581.")
|
||||
Container(loki, "Loki", "grafana/loki:3.4.2", "Stores log streams from all containers.")
|
||||
Container(promtail, "Promtail", "grafana/promtail:3.4.2", "Ships Docker container logs to Loki via Docker SD")
|
||||
Container(grafana, "Grafana", "grafana/grafana", "Dashboards and alerting UI. Data sources: Prometheus + Loki. Wiring TBD — see issue #581.")
|
||||
Container(tempo, "Tempo", "grafana/tempo:2.7.2", "Distributed trace storage. OTLP gRPC receiver on port 4317 (archiv-net). Grafana queries traces on port 3200 (obs-net). All ports internal only.")
|
||||
Container(grafana, "Grafana", "grafana/grafana", "Dashboards and alerting UI. Data sources: Prometheus + Loki + Tempo. Wiring TBD — see issue #581.")
|
||||
}
|
||||
|
||||
Rel(user, caddy, "HTTPS", "TLS 1.2/1.3")
|
||||
@@ -36,5 +37,6 @@ Rel(backend, mail, "Sends notification and password-reset emails (optional)", "S
|
||||
Rel(ocr, storage, "Fetches PDF via presigned URL", "HTTP / S3 presigned")
|
||||
Rel(mc, storage, "Bootstraps bucket + service account on startup", "MinIO Client CLI")
|
||||
Rel(promtail, loki, "Pushes log streams", "HTTP/Loki push API")
|
||||
Rel(backend, tempo, "Sends distributed traces via OTLP", "gRPC / OTLP / port 4317 (archiv-net)")
|
||||
|
||||
@enduml
|
||||
|
||||
51
infra/observability/tempo/tempo.yml
Normal file
51
infra/observability/tempo/tempo.yml
Normal file
@@ -0,0 +1,51 @@
|
||||
server:
|
||||
http_listen_port: 3200
|
||||
|
||||
distributor:
|
||||
receivers:
|
||||
otlp:
|
||||
protocols:
|
||||
grpc:
|
||||
endpoint: 0.0.0.0:4317
|
||||
http:
|
||||
endpoint: 0.0.0.0:4318
|
||||
|
||||
ingester:
|
||||
max_block_duration: 5m
|
||||
|
||||
compactor:
|
||||
compaction:
|
||||
# 30 days — matches Loki retention. Compactor enforces this automatically;
|
||||
# no manual intervention needed under normal trace volumes.
|
||||
block_retention: 720h
|
||||
|
||||
storage:
|
||||
trace:
|
||||
# Local filesystem storage — single-VPS deployment, no S3 backend needed.
|
||||
# Both paths are on the same named Docker volume (tempo_data) so they
|
||||
# survive container restarts without split-brain between WAL and blocks.
|
||||
backend: local
|
||||
local:
|
||||
path: /var/tempo/blocks
|
||||
wal:
|
||||
path: /var/tempo/wal
|
||||
|
||||
metrics_generator:
|
||||
registry:
|
||||
external_labels:
|
||||
source: tempo
|
||||
storage:
|
||||
path: /var/tempo/generator/wal
|
||||
processors:
|
||||
- service-graphs
|
||||
- span-metrics
|
||||
|
||||
# Tempo HTTP API (port 3200) is unauthenticated. Access is controlled entirely
|
||||
# by network isolation: only Grafana (on obs-net) should reach this port.
|
||||
# The OTLP receivers (4317 gRPC, 4318 HTTP) are internal to archiv-net only.
|
||||
overrides:
|
||||
defaults:
|
||||
metrics_generator:
|
||||
processors:
|
||||
- service-graphs
|
||||
- span-metrics
|
||||
Reference in New Issue
Block a user