devops(backend): expose Prometheus metrics endpoint + OTLP trace export from Spring Boot
Some checks failed
CI / Unit & Component Tests (pull_request) Successful in 3m20s
CI / OCR Service Tests (pull_request) Successful in 16s
CI / Backend Unit Tests (pull_request) Failing after 2m35s
CI / fail2ban Regex (pull_request) Successful in 37s
CI / Compose Bucket Idempotency (pull_request) Successful in 59s
Some checks failed
CI / Unit & Component Tests (pull_request) Successful in 3m20s
CI / OCR Service Tests (pull_request) Successful in 16s
CI / Backend Unit Tests (pull_request) Failing after 2m35s
CI / fail2ban Regex (pull_request) Successful in 37s
CI / Compose Bucket Idempotency (pull_request) Successful in 59s
- Add micrometer-registry-prometheus (BOM-managed) to expose /actuator/prometheus - Add micrometer-tracing-bridge-otel (BOM-managed) for Micrometer → OTel tracing bridge - Add opentelemetry-spring-boot-starter 2.27.0 (pinned — not in Spring Boot BOM) - Move management to port 8081 so Prometheus scrapes directly inside archiv-net, bypassing both Caddy and Spring Security's session-authenticated filter chain - Configure otel.service.name and OTLP endpoint (default localhost:4317 for CI safety) - Set tracing sampling probability to 1.0 in base config; override via env var in compose - Add OTEL_EXPORTER_OTLP_ENDPOINT + MANAGEMENT_TRACING_SAMPLING_PROBABILITY to docker-compose.yml - Expose management port 8081 inside archiv-net for Prometheus scraping - Disable trace export in application-test.yaml (probability: 0.0) for deterministic CI OTLP export failures are non-fatal; app starts cleanly without Tempo running. Closes #576 Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -197,6 +197,25 @@
|
||||
<artifactId>jsoup</artifactId>
|
||||
<version>1.18.1</version>
|
||||
</dependency>
|
||||
|
||||
<!-- Observability: Prometheus metrics scrape endpoint (version managed by Spring Boot BOM) -->
|
||||
<dependency>
|
||||
<groupId>io.micrometer</groupId>
|
||||
<artifactId>micrometer-registry-prometheus</artifactId>
|
||||
</dependency>
|
||||
|
||||
<!-- Observability: Micrometer → OpenTelemetry tracing bridge (version managed by Spring Boot BOM) -->
|
||||
<dependency>
|
||||
<groupId>io.micrometer</groupId>
|
||||
<artifactId>micrometer-tracing-bridge-otel</artifactId>
|
||||
</dependency>
|
||||
|
||||
<!-- Observability: OTel Spring Boot auto-instrumentation — NOT in Spring Boot BOM, pinned explicitly -->
|
||||
<dependency>
|
||||
<groupId>io.opentelemetry.instrumentation</groupId>
|
||||
<artifactId>opentelemetry-spring-boot-starter</artifactId>
|
||||
<version>2.27.0</version>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
|
||||
|
||||
|
||||
@@ -45,9 +45,34 @@ server:
|
||||
forward-headers-strategy: native
|
||||
|
||||
management:
|
||||
server:
|
||||
# Management port is separate from the app port so that:
|
||||
# (a) Caddy never proxies /actuator/* (it only routes :8080 → the app port)
|
||||
# (b) Prometheus scrapes backend:8081 directly inside archiv-net, not via Caddy
|
||||
# (c) Spring Security's session-authenticated filter chain on :8080 never sees actuator requests
|
||||
port: 8081
|
||||
endpoints:
|
||||
web:
|
||||
exposure:
|
||||
include: health,info,prometheus,metrics
|
||||
endpoint:
|
||||
prometheus:
|
||||
enabled: true
|
||||
health:
|
||||
mail:
|
||||
enabled: false
|
||||
tracing:
|
||||
sampling:
|
||||
probability: 1.0 # 100% in dev; override via MANAGEMENT_TRACING_SAMPLING_PROBABILITY in prod compose
|
||||
|
||||
# OpenTelemetry trace export — failures are non-fatal (app starts cleanly without Tempo running)
|
||||
# The default http://localhost:4317 ensures CI compatibility when no observability stack is present.
|
||||
otel:
|
||||
service:
|
||||
name: familienarchiv-backend
|
||||
exporter:
|
||||
otlp:
|
||||
endpoint: ${OTEL_EXPORTER_OTLP_ENDPOINT:http://localhost:4317}
|
||||
|
||||
springdoc:
|
||||
api-docs:
|
||||
|
||||
@@ -13,3 +13,10 @@ spring:
|
||||
password: test
|
||||
mail:
|
||||
host: localhost
|
||||
|
||||
# Disable trace export in tests — prevents OTLP connection attempts when no Tempo is running.
|
||||
# Sampling probability 0.0 means no spans are created, so no export is attempted.
|
||||
management:
|
||||
tracing:
|
||||
sampling:
|
||||
probability: 0.0
|
||||
|
||||
@@ -147,8 +147,18 @@ services:
|
||||
SPRING_MAIL_PROPERTIES_MAIL_SMTP_STARTTLS_ENABLE: ${MAIL_STARTTLS_ENABLE:-false}
|
||||
APP_OCR_BASE_URL: http://ocr-service:8000
|
||||
APP_OCR_TRAINING_TOKEN: "${OCR_TRAINING_TOKEN:-}"
|
||||
# Observability: send traces to Tempo inside archiv-net (OTLP gRPC port 4317)
|
||||
# Tempo is defined in docker-compose.observability.yml (future issue).
|
||||
# OTLP failures are non-fatal — backend starts cleanly without the observability stack.
|
||||
OTEL_EXPORTER_OTLP_ENDPOINT: http://tempo:4317
|
||||
# 10% sampling in this compose (dev + staging) — override locally to 1.0 if needed
|
||||
MANAGEMENT_TRACING_SAMPLING_PROBABILITY: "0.1"
|
||||
ports:
|
||||
- "${PORT_BACKEND}:8080"
|
||||
# Management port — Prometheus scrapes /actuator/prometheus from inside archiv-net.
|
||||
# Not exposed to the host; Docker service-name DNS (backend:8081) is sufficient.
|
||||
expose:
|
||||
- "8081"
|
||||
networks:
|
||||
- archiv-net
|
||||
healthcheck:
|
||||
|
||||
Reference in New Issue
Block a user