devops(observability): add Loki + Promtail for centralised container log aggregation
All checks were successful
CI / Unit & Component Tests (pull_request) Successful in 3m21s
CI / OCR Service Tests (pull_request) Successful in 16s
CI / Backend Unit Tests (pull_request) Successful in 4m31s
CI / fail2ban Regex (pull_request) Successful in 38s
CI / Compose Bucket Idempotency (pull_request) Successful in 57s

- Add obs-loki (grafana/loki:3.4.2) to docker-compose.observability.yml
  with healthcheck (wget /ready), expose-only port 3100, named volume loki_data
- Add obs-promtail (grafana/promtail:3.4.2) bridging archiv-net + obs-net,
  depends_on loki service_healthy, docker.sock:ro, promtail_positions volume
  for restart-safe position tracking
- Create infra/observability/loki/loki-config.yml: single-node TSDB schema v13,
  30-day retention, auth disabled (obs-net only), telemetry off
- Create infra/observability/promtail/promtail-config.yml: Docker SD scrape,
  container_name / compose_service / compose_project / logstream labels
- Update docs/DEPLOYMENT.md §4 with service table and Loki quick-check commands

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Marcel
2026-05-15 02:18:22 +02:00
parent 6a118589c2
commit 22e1b25398
6 changed files with 138 additions and 4 deletions

View File

@@ -0,0 +1,32 @@
auth_enabled: false # safe — loki is not exposed beyond obs-net. Add auth before binding port 3100 to host.
server:
http_listen_port: 3100
common:
instance_addr: 127.0.0.1
path_prefix: /loki
storage:
filesystem:
chunks_directory: /loki/chunks
rules_directory: /loki/rules
replication_factor: 1
ring:
kvstore:
store: inmemory # correct for single-node — no etcd/consul needed here
schema_config:
configs:
- from: 2024-01-01
store: tsdb
object_store: filesystem
schema: v13
index:
prefix: index_
period: 24h
limits_config:
retention_period: 720h # 30 days — low-volume family archive; revisit if log volume grows
analytics:
reporting_enabled: false # no telemetry sent to Grafana Labs

View File

@@ -0,0 +1,30 @@
server:
http_listen_port: 9080
grpc_listen_port: 0 # gRPC disabled — used for Promtail clustering only; single-node deployment
positions:
filename: /tmp/positions.yaml # /tmp is a named volume (promtail_positions) — persists across restarts
clients:
- url: http://loki:3100/loki/api/v1/push
# Loki HTTP API is unauthenticated internally. Any container on obs-net can push logs.
# Acceptable: only trusted application containers join this network.
scrape_configs:
- job_name: docker-containers
docker_sd_configs:
- host: unix:///var/run/docker.sock
refresh_interval: 5s
relabel_configs:
- source_labels: ['__meta_docker_container_name']
regex: '/(.*)'
target_label: 'container_name'
# Note: container_name differs between dev (archive-backend) and prod
# (archiv-production-backend-1). Prefer compose_service for stable LogQL
# queries across environments — it is stable: backend, db, minio, etc.
- source_labels: ['__meta_docker_container_label_com_docker_compose_service']
target_label: 'compose_service'
- source_labels: ['__meta_docker_container_label_com_docker_compose_project']
target_label: 'compose_project'
- source_labels: ['__meta_docker_container_log_stream']
target_label: 'logstream'