Merge pull request 'devops(backend): expose Prometheus metrics endpoint + OTLP trace export from Spring Boot' (#588) from feat/issue-576-backend-instrumentation into main
Some checks failed
CI / Unit & Component Tests (push) Successful in 3m19s
CI / OCR Service Tests (push) Successful in 17s
CI / Backend Unit Tests (push) Successful in 4m43s
CI / fail2ban Regex (push) Successful in 39s
CI / Compose Bucket Idempotency (push) Successful in 57s
nightly / deploy-staging (push) Failing after 2m6s
Some checks failed
CI / Unit & Component Tests (push) Successful in 3m19s
CI / OCR Service Tests (push) Successful in 17s
CI / Backend Unit Tests (push) Successful in 4m43s
CI / fail2ban Regex (push) Successful in 39s
CI / Compose Bucket Idempotency (push) Successful in 57s
nightly / deploy-staging (push) Failing after 2m6s
devops(backend): expose Prometheus metrics endpoint + OTLP trace export from Spring Boot (#588)
This commit was merged in pull request #588.
This commit is contained in:
@@ -197,6 +197,33 @@
|
|||||||
<artifactId>jsoup</artifactId>
|
<artifactId>jsoup</artifactId>
|
||||||
<version>1.18.1</version>
|
<version>1.18.1</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
|
<!-- Observability: Prometheus metrics scrape endpoint (version managed by Spring Boot BOM) -->
|
||||||
|
<dependency>
|
||||||
|
<groupId>io.micrometer</groupId>
|
||||||
|
<artifactId>micrometer-registry-prometheus</artifactId>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<!-- Observability: Micrometer → OpenTelemetry tracing bridge (version managed by Spring Boot BOM) -->
|
||||||
|
<dependency>
|
||||||
|
<groupId>io.micrometer</groupId>
|
||||||
|
<artifactId>micrometer-tracing-bridge-otel</artifactId>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<!-- Observability: OTel Spring Boot auto-instrumentation — NOT in Spring Boot BOM, pinned explicitly -->
|
||||||
|
<dependency>
|
||||||
|
<groupId>io.opentelemetry.instrumentation</groupId>
|
||||||
|
<artifactId>opentelemetry-spring-boot-starter</artifactId>
|
||||||
|
<version>2.27.0</version>
|
||||||
|
<exclusions>
|
||||||
|
<!-- Excludes AzureAppServiceResourceProvider which references ServiceAttributes.SERVICE_INSTANCE_ID
|
||||||
|
that does not exist in the semconv version pulled by this project. -->
|
||||||
|
<exclusion>
|
||||||
|
<groupId>io.opentelemetry.contrib</groupId>
|
||||||
|
<artifactId>opentelemetry-azure-resources</artifactId>
|
||||||
|
</exclusion>
|
||||||
|
</exclusions>
|
||||||
|
</dependency>
|
||||||
</dependencies>
|
</dependencies>
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -45,9 +45,34 @@ server:
|
|||||||
forward-headers-strategy: native
|
forward-headers-strategy: native
|
||||||
|
|
||||||
management:
|
management:
|
||||||
|
server:
|
||||||
|
# Management port is separate from the app port so that:
|
||||||
|
# (a) Caddy never proxies /actuator/* (it only routes :8080 → the app port)
|
||||||
|
# (b) Prometheus scrapes backend:8081 directly inside archiv-net, not via Caddy
|
||||||
|
# (c) Spring Security's session-authenticated filter chain on :8080 never sees actuator requests
|
||||||
|
port: 8081
|
||||||
|
endpoints:
|
||||||
|
web:
|
||||||
|
exposure:
|
||||||
|
include: health,info,prometheus,metrics
|
||||||
|
endpoint:
|
||||||
|
prometheus:
|
||||||
|
enabled: true
|
||||||
health:
|
health:
|
||||||
mail:
|
mail:
|
||||||
enabled: false
|
enabled: false
|
||||||
|
tracing:
|
||||||
|
sampling:
|
||||||
|
probability: 1.0 # 100% in dev; override via MANAGEMENT_TRACING_SAMPLING_PROBABILITY in prod compose
|
||||||
|
|
||||||
|
# OpenTelemetry trace export — failures are non-fatal (app starts cleanly without Tempo running)
|
||||||
|
# The default http://localhost:4317 ensures CI compatibility when no observability stack is present.
|
||||||
|
otel:
|
||||||
|
service:
|
||||||
|
name: familienarchiv-backend
|
||||||
|
exporter:
|
||||||
|
otlp:
|
||||||
|
endpoint: ${OTEL_EXPORTER_OTLP_ENDPOINT:http://localhost:4317}
|
||||||
|
|
||||||
springdoc:
|
springdoc:
|
||||||
api-docs:
|
api-docs:
|
||||||
|
|||||||
@@ -13,3 +13,16 @@ spring:
|
|||||||
password: test
|
password: test
|
||||||
mail:
|
mail:
|
||||||
host: localhost
|
host: localhost
|
||||||
|
|
||||||
|
# Disable OTel SDK entirely in tests — prevents auto-configuration from loading resource providers
|
||||||
|
# (e.g. AzureAppServiceResourceProvider) that fail against the semconv version used here.
|
||||||
|
otel:
|
||||||
|
sdk:
|
||||||
|
disabled: true
|
||||||
|
|
||||||
|
# Disable trace export in tests — prevents OTLP connection attempts when no Tempo is running.
|
||||||
|
# Sampling probability 0.0 means no spans are created, so no export is attempted.
|
||||||
|
management:
|
||||||
|
tracing:
|
||||||
|
sampling:
|
||||||
|
probability: 0.0
|
||||||
|
|||||||
@@ -147,8 +147,18 @@ services:
|
|||||||
SPRING_MAIL_PROPERTIES_MAIL_SMTP_STARTTLS_ENABLE: ${MAIL_STARTTLS_ENABLE:-false}
|
SPRING_MAIL_PROPERTIES_MAIL_SMTP_STARTTLS_ENABLE: ${MAIL_STARTTLS_ENABLE:-false}
|
||||||
APP_OCR_BASE_URL: http://ocr-service:8000
|
APP_OCR_BASE_URL: http://ocr-service:8000
|
||||||
APP_OCR_TRAINING_TOKEN: "${OCR_TRAINING_TOKEN:-}"
|
APP_OCR_TRAINING_TOKEN: "${OCR_TRAINING_TOKEN:-}"
|
||||||
|
# Observability: send traces to Tempo inside archiv-net (OTLP gRPC port 4317)
|
||||||
|
# Tempo is defined in docker-compose.observability.yml (future issue).
|
||||||
|
# OTLP failures are non-fatal — backend starts cleanly without the observability stack.
|
||||||
|
OTEL_EXPORTER_OTLP_ENDPOINT: http://tempo:4317
|
||||||
|
# 10% sampling in this compose (dev + staging) — override locally to 1.0 if needed
|
||||||
|
MANAGEMENT_TRACING_SAMPLING_PROBABILITY: "0.1"
|
||||||
ports:
|
ports:
|
||||||
- "${PORT_BACKEND}:8080"
|
- "${PORT_BACKEND}:8080"
|
||||||
|
# Management port — Prometheus scrapes /actuator/prometheus from inside archiv-net.
|
||||||
|
# Not exposed to the host; Docker service-name DNS (backend:8081) is sufficient.
|
||||||
|
expose:
|
||||||
|
- "8081"
|
||||||
networks:
|
networks:
|
||||||
- archiv-net
|
- archiv-net
|
||||||
healthcheck:
|
healthcheck:
|
||||||
|
|||||||
@@ -107,6 +107,8 @@ All vars are set in `.env` at the repo root (copy from `.env.example`). The back
|
|||||||
| `MAIL_SMTP_AUTH` | SMTP auth enabled | `false` (dev) | YES (prod) | — |
|
| `MAIL_SMTP_AUTH` | SMTP auth enabled | `false` (dev) | YES (prod) | — |
|
||||||
| `MAIL_STARTTLS_ENABLE` | STARTTLS enabled | `false` (dev) | YES (prod) | — |
|
| `MAIL_STARTTLS_ENABLE` | STARTTLS enabled | `false` (dev) | YES (prod) | — |
|
||||||
| `SPRING_PROFILES_ACTIVE` | Spring profile | `dev,e2e` (compose) | YES | — |
|
| `SPRING_PROFILES_ACTIVE` | Spring profile | `dev,e2e` (compose) | YES | — |
|
||||||
|
| `OTEL_EXPORTER_OTLP_ENDPOINT` | OTLP gRPC endpoint for distributed traces (Tempo). Set to `http://tempo:4317` via compose. | `http://localhost:4317` | — | — |
|
||||||
|
| `MANAGEMENT_TRACING_SAMPLING_PROBABILITY` | Micrometer tracing sample rate; overridden to `0.0` in test profile. | `0.1` (compose) / `1.0` (dev) | — | — |
|
||||||
|
|
||||||
### PostgreSQL container
|
### PostgreSQL container
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user