devops(backend): expose Prometheus metrics endpoint + OTLP trace export from Spring Boot #588

Merged
marcel merged 3 commits from feat/issue-576-backend-instrumentation into main 2026-05-15 03:57:15 +02:00
4 changed files with 61 additions and 0 deletions
Showing only changes of commit b3e49a9504 - Show all commits

View File

@@ -197,6 +197,25 @@
<artifactId>jsoup</artifactId>
<version>1.18.1</version>
</dependency>
<!-- Observability: Prometheus metrics scrape endpoint (version managed by Spring Boot BOM) -->
<dependency>
<groupId>io.micrometer</groupId>
<artifactId>micrometer-registry-prometheus</artifactId>
</dependency>
<!-- Observability: Micrometer → OpenTelemetry tracing bridge (version managed by Spring Boot BOM) -->
<dependency>
<groupId>io.micrometer</groupId>
<artifactId>micrometer-tracing-bridge-otel</artifactId>
</dependency>
<!-- Observability: OTel Spring Boot auto-instrumentation — NOT in Spring Boot BOM, pinned explicitly -->
<dependency>
<groupId>io.opentelemetry.instrumentation</groupId>
<artifactId>opentelemetry-spring-boot-starter</artifactId>
<version>2.27.0</version>
</dependency>
</dependencies>

View File

@@ -45,9 +45,34 @@ server:
forward-headers-strategy: native
management:
server:
# Management port is separate from the app port so that:
# (a) Caddy never proxies /actuator/* (it only routes :8080 → the app port)
# (b) Prometheus scrapes backend:8081 directly inside archiv-net, not via Caddy
# (c) Spring Security's session-authenticated filter chain on :8080 never sees actuator requests
port: 8081
endpoints:
web:
exposure:
include: health,info,prometheus,metrics
endpoint:
prometheus:
enabled: true
health:
mail:
enabled: false
tracing:
sampling:
probability: 1.0 # 100% in dev; override via MANAGEMENT_TRACING_SAMPLING_PROBABILITY in prod compose
# OpenTelemetry trace export — failures are non-fatal (app starts cleanly without Tempo running)
# The default http://localhost:4317 ensures CI compatibility when no observability stack is present.
otel:
service:
name: familienarchiv-backend
exporter:
otlp:
endpoint: ${OTEL_EXPORTER_OTLP_ENDPOINT:http://localhost:4317}
springdoc:
api-docs:

View File

@@ -13,3 +13,10 @@ spring:
password: test
mail:
host: localhost
# Disable trace export in tests — prevents OTLP connection attempts when no Tempo is running.
# Sampling probability 0.0 means no spans are created, so no export is attempted.
management:
tracing:
sampling:
probability: 0.0

View File

@@ -147,8 +147,18 @@ services:
SPRING_MAIL_PROPERTIES_MAIL_SMTP_STARTTLS_ENABLE: ${MAIL_STARTTLS_ENABLE:-false}
APP_OCR_BASE_URL: http://ocr-service:8000
APP_OCR_TRAINING_TOKEN: "${OCR_TRAINING_TOKEN:-}"
# Observability: send traces to Tempo inside archiv-net (OTLP gRPC port 4317)
# Tempo is defined in docker-compose.observability.yml (future issue).
# OTLP failures are non-fatal — backend starts cleanly without the observability stack.
OTEL_EXPORTER_OTLP_ENDPOINT: http://tempo:4317
# 10% sampling in this compose (dev + staging) — override locally to 1.0 if needed
MANAGEMENT_TRACING_SAMPLING_PROBABILITY: "0.1"
ports:
- "${PORT_BACKEND}:8080"
# Management port — Prometheus scrapes /actuator/prometheus from inside archiv-net.
# Not exposed to the host; Docker service-name DNS (backend:8081) is sufficient.
expose:
- "8081"
networks:
- archiv-net
healthcheck: