From 5dd74df29339be91439e63181a8c56453649b77f Mon Sep 17 00:00:00 2001 From: Marcel Date: Sat, 16 May 2026 11:20:59 +0200 Subject: [PATCH 1/9] fix(obs): wire Prometheus metrics and Loki job label for Grafana dashboards MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three root causes confirmed via live server investigation (issue #604): 1. ManagementWebSecurityAutoConfiguration applied HTTP Basic auth to the management port (8081), causing Prometheus to receive 401 HTML responses instead of metrics. Excluded the auto-config — the Docker network (archiv-net) provides the security boundary for this internal port. 2. promtail-config.yml had no `job` relabel rule. Grafana's Loki dashboards query {job="$app"} which matched nothing; logs were in Loki under compose_service but invisible to every dashboard panel. 3. prometheus.yml had a stale comment claiming the spring-boot target would be DOWN until micrometer-registry-prometheus was added — it has been present in pom.xml for some time. Co-Authored-By: Claude Sonnet 4.6 --- .../org/raddatz/familienarchiv/FamilienarchivApplication.java | 4 +++- infra/observability/prometheus/prometheus.yml | 2 -- infra/observability/promtail/promtail-config.yml | 2 ++ 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/backend/src/main/java/org/raddatz/familienarchiv/FamilienarchivApplication.java b/backend/src/main/java/org/raddatz/familienarchiv/FamilienarchivApplication.java index 4fef338f..09227d27 100644 --- a/backend/src/main/java/org/raddatz/familienarchiv/FamilienarchivApplication.java +++ b/backend/src/main/java/org/raddatz/familienarchiv/FamilienarchivApplication.java @@ -1,9 +1,11 @@ package org.raddatz.familienarchiv; import org.springframework.boot.SpringApplication; +import org.springframework.boot.actuate.autoconfigure.security.servlet.ManagementWebSecurityAutoConfiguration; import org.springframework.boot.autoconfigure.SpringBootApplication; -@SpringBootApplication +// Excluded: management port (8081) is network-isolated inside archiv-net; no app-level auth needed. +@SpringBootApplication(exclude = {ManagementWebSecurityAutoConfiguration.class}) public class FamilienarchivApplication { public static void main(String[] args) { diff --git a/infra/observability/prometheus/prometheus.yml b/infra/observability/prometheus/prometheus.yml index 38a0f8d6..a29cc75b 100644 --- a/infra/observability/prometheus/prometheus.yml +++ b/infra/observability/prometheus/prometheus.yml @@ -15,8 +15,6 @@ scrape_configs: metrics_path: /actuator/prometheus static_configs: # Uses the Docker service name (not container_name) for reliable DNS resolution. - # Target will show as DOWN until backend instrumentation issue adds - # micrometer-registry-prometheus and exposes the endpoint — this is expected. - targets: ['backend:8081'] - job_name: ocr-service diff --git a/infra/observability/promtail/promtail-config.yml b/infra/observability/promtail/promtail-config.yml index b569c22f..b31781a4 100644 --- a/infra/observability/promtail/promtail-config.yml +++ b/infra/observability/promtail/promtail-config.yml @@ -28,3 +28,5 @@ scrape_configs: target_label: 'compose_project' - source_labels: ['__meta_docker_container_log_stream'] target_label: 'logstream' + - source_labels: ['__meta_docker_container_label_com_docker_compose_service'] + target_label: 'job' -- 2.49.1 From 2aa0ff9e704adfc12e875789b8cc49da20195584 Mon Sep 17 00:00:00 2001 From: Marcel Date: Sat, 16 May 2026 12:08:20 +0200 Subject: [PATCH 2/9] fix(obs): wire Prometheus endpoint for Spring Boot 4.0 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Four Spring Boot 4.0-specific issues prevented /actuator/prometheus from working: 1. spring-boot-starter-micrometer-metrics missing — Spring Boot 4.0 splits Micrometer metrics export (including the Prometheus scrape endpoint) out of spring-boot-starter-actuator into its own starter. Added dependency. 2. management.prometheus.metrics.export.enabled not set — Spring Boot 4.0 defaults metrics export to false (opt-in). Added the property to application.yaml. 3. SecurityConfig did not permit /actuator/prometheus — Spring Boot 4.0 with Jetty serves the management port (8081) via the same security filter chain as the main port (8080). The previous commit's exclusion of ManagementWebSecurityAutoConfiguration was a no-op (that class no longer exists in Spring Boot 4.0); removed it and added the correct permitAll() rule. Updated the architecture comment in application.yaml to reflect the true filter-chain behaviour. 4. Reverted invalid FamilienarchivApplication.java change from the prior commit (ManagementWebSecurityAutoConfiguration import compiled against a class that does not exist in the Spring Boot 4.0 BOM). Also adds ActuatorPrometheusIT — an integration test that asserts the /actuator/prometheus endpoint returns 200 with jvm_memory_used_bytes without credentials, serving as regression protection against future Spring Boot upgrades silently breaking metrics collection. Co-Authored-By: Claude Sonnet 4.6 --- backend/pom.xml | 5 ++ .../FamilienarchivApplication.java | 4 +- .../security/SecurityConfig.java | 10 +++- backend/src/main/resources/application.yaml | 8 +++- .../familienarchiv/ActuatorPrometheusIT.java | 48 +++++++++++++++++++ 5 files changed, 69 insertions(+), 6 deletions(-) create mode 100644 backend/src/test/java/org/raddatz/familienarchiv/ActuatorPrometheusIT.java diff --git a/backend/pom.xml b/backend/pom.xml index dd0bc03c..0dd83185 100644 --- a/backend/pom.xml +++ b/backend/pom.xml @@ -48,6 +48,11 @@ org.springframework.boot spring-boot-starter-actuator + + + org.springframework.boot + spring-boot-starter-micrometer-metrics + org.springframework.boot spring-boot-starter-validation diff --git a/backend/src/main/java/org/raddatz/familienarchiv/FamilienarchivApplication.java b/backend/src/main/java/org/raddatz/familienarchiv/FamilienarchivApplication.java index 09227d27..4fef338f 100644 --- a/backend/src/main/java/org/raddatz/familienarchiv/FamilienarchivApplication.java +++ b/backend/src/main/java/org/raddatz/familienarchiv/FamilienarchivApplication.java @@ -1,11 +1,9 @@ package org.raddatz.familienarchiv; import org.springframework.boot.SpringApplication; -import org.springframework.boot.actuate.autoconfigure.security.servlet.ManagementWebSecurityAutoConfiguration; import org.springframework.boot.autoconfigure.SpringBootApplication; -// Excluded: management port (8081) is network-isolated inside archiv-net; no app-level auth needed. -@SpringBootApplication(exclude = {ManagementWebSecurityAutoConfiguration.class}) +@SpringBootApplication public class FamilienarchivApplication { public static void main(String[] args) { diff --git a/backend/src/main/java/org/raddatz/familienarchiv/security/SecurityConfig.java b/backend/src/main/java/org/raddatz/familienarchiv/security/SecurityConfig.java index 298d9fa6..2cf85573 100644 --- a/backend/src/main/java/org/raddatz/familienarchiv/security/SecurityConfig.java +++ b/backend/src/main/java/org/raddatz/familienarchiv/security/SecurityConfig.java @@ -54,8 +54,14 @@ public class SecurityConfig { .csrf(csrf -> csrf.disable()) .authorizeHttpRequests(auth -> { - // Health endpoint must be open so CI/Docker health checks work without credentials - auth.requestMatchers("/actuator/health").permitAll(); + // Both /actuator/health and /actuator/prometheus must be open. + // In Spring Boot 4.0 the management server (port 8081) shares the security filter chain; + // network isolation (port 8081 not published in docker-compose) is the security boundary. + // Health and Prometheus must be open — no credentials for Docker health checks or Prometheus scraping. + // Note: in Spring Boot 4.0 the management port shares the security filter chain, + // so these paths must be explicitly permitted here even though they are served on port 8081. + // Network isolation (port 8081 not published in docker-compose) is the outer security boundary. + auth.requestMatchers("/actuator/health", "/actuator/prometheus").permitAll(); // Password reset endpoints are unauthenticated by nature auth.requestMatchers("/api/auth/forgot-password", "/api/auth/reset-password").permitAll(); // Invite-based registration endpoints are public diff --git a/backend/src/main/resources/application.yaml b/backend/src/main/resources/application.yaml index ead3d9e8..5f4fb207 100644 --- a/backend/src/main/resources/application.yaml +++ b/backend/src/main/resources/application.yaml @@ -49,7 +49,8 @@ management: # Management port is separate from the app port so that: # (a) Caddy never proxies /actuator/* (it only routes :8080 → the app port) # (b) Prometheus scrapes backend:8081 directly inside archiv-net, not via Caddy - # (c) Spring Security's session-authenticated filter chain on :8080 never sees actuator requests + # Note: in Spring Boot 4.0 the management port shares the security filter chain; /actuator/health + # and /actuator/prometheus must be explicitly permitted in SecurityConfig — see SecurityConfig.java. port: 8081 endpoints: web: @@ -58,6 +59,11 @@ management: endpoint: prometheus: enabled: true + # Spring Boot 4.0: metrics export is disabled by default — explicitly opt in for Prometheus + prometheus: + metrics: + export: + enabled: true health: mail: enabled: false diff --git a/backend/src/test/java/org/raddatz/familienarchiv/ActuatorPrometheusIT.java b/backend/src/test/java/org/raddatz/familienarchiv/ActuatorPrometheusIT.java new file mode 100644 index 00000000..36d91bfa --- /dev/null +++ b/backend/src/test/java/org/raddatz/familienarchiv/ActuatorPrometheusIT.java @@ -0,0 +1,48 @@ +package org.raddatz.familienarchiv; + +import org.junit.jupiter.api.Test; +import org.springframework.boot.test.context.SpringBootTest; +import org.springframework.boot.test.web.server.LocalManagementPort; +import org.springframework.context.annotation.Import; +import org.springframework.http.ResponseEntity; +import org.springframework.test.context.ActiveProfiles; +import org.springframework.test.context.bean.override.mockito.MockitoBean; +import org.springframework.web.client.DefaultResponseErrorHandler; +import org.springframework.web.client.RestTemplate; +import software.amazon.awssdk.services.s3.S3Client; + +import java.io.IOException; + +import static org.assertj.core.api.Assertions.assertThat; + +@SpringBootTest(webEnvironment = SpringBootTest.WebEnvironment.RANDOM_PORT) +@ActiveProfiles("test") +@Import(PostgresContainerConfig.class) +class ActuatorPrometheusIT { + + @LocalManagementPort + private int managementPort; + + @MockitoBean + S3Client s3Client; + + @Test + void prometheus_endpoint_returns_jvm_metrics_without_credentials() { + ResponseEntity response = noThrowTemplate().getForEntity( + "http://localhost:" + managementPort + "/actuator/prometheus", String.class); + + assertThat(response.getStatusCode().value()).isEqualTo(200); + assertThat(response.getBody()).contains("jvm_memory_used_bytes"); + } + + private RestTemplate noThrowTemplate() { + RestTemplate template = new RestTemplate(); + template.setErrorHandler(new DefaultResponseErrorHandler() { + @Override + public boolean hasError(org.springframework.http.client.ClientHttpResponse response) throws IOException { + return false; + } + }); + return template; + } +} -- 2.49.1 From e19bd609846e9393afa63da50344e18e3ecec9a7 Mon Sep 17 00:00:00 2001 From: Marcel Date: Sat, 16 May 2026 13:55:28 +0200 Subject: [PATCH 3/9] fix(obs): add management security chain and split Prometheus IT tests - Add @Order(1) managementFilterChain scoped to /actuator/** with explicit 401 entry point, blocking all non-public actuator paths without the form-login redirect that the main chain uses for browser clients. - Split single combined test into two focused assertions (prometheus_endpoint_returns_200_without_credentials, prometheus_endpoint_returns_jvm_metrics). - Add negative regression test: actuator_metrics_requires_authentication verifies that /actuator/metrics returns 401 without credentials. Addresses reviewer concerns from @sara (missing negative test, split assertions) and @nora (dedicated management security layer). Co-Authored-By: Claude Sonnet 4.6 --- .../security/SecurityConfig.java | 35 +++++++++++++++---- .../familienarchiv/ActuatorPrometheusIT.java | 17 ++++++++- 2 files changed, 44 insertions(+), 8 deletions(-) diff --git a/backend/src/main/java/org/raddatz/familienarchiv/security/SecurityConfig.java b/backend/src/main/java/org/raddatz/familienarchiv/security/SecurityConfig.java index 2cf85573..8b1a45ac 100644 --- a/backend/src/main/java/org/raddatz/familienarchiv/security/SecurityConfig.java +++ b/backend/src/main/java/org/raddatz/familienarchiv/security/SecurityConfig.java @@ -3,13 +3,16 @@ package org.raddatz.familienarchiv.security; import lombok.RequiredArgsConstructor; import org.raddatz.familienarchiv.user.CustomUserDetailsService; +import jakarta.servlet.http.HttpServletResponse; import org.springframework.context.annotation.Bean; import org.springframework.context.annotation.Configuration; +import org.springframework.core.annotation.Order; import org.springframework.core.env.Environment; import org.springframework.security.authentication.dao.DaoAuthenticationProvider; import org.springframework.security.config.Customizer; import org.springframework.security.config.annotation.web.builders.HttpSecurity; import org.springframework.security.config.annotation.web.configuration.EnableWebSecurity; +import org.springframework.security.config.annotation.web.configurers.AbstractHttpConfigurer; import org.springframework.security.crypto.bcrypt.BCryptPasswordEncoder; import org.springframework.security.crypto.password.PasswordEncoder; import org.springframework.security.web.SecurityFilterChain; @@ -34,6 +37,28 @@ public class SecurityConfig { return authProvider; } + @Bean + @Order(1) + public SecurityFilterChain managementFilterChain(HttpSecurity http) throws Exception { + http + .securityMatcher("/actuator/**") + .authorizeHttpRequests(auth -> { + // Health and Prometheus are open — Docker health checks and Prometheus scraping need no credentials. + auth.requestMatchers("/actuator/health", "/actuator/prometheus").permitAll(); + // All other actuator endpoints (metrics, info, env, heapdump…) require authentication. + auth.anyRequest().authenticated(); + }) + // Explicitly return 401 for any unauthenticated actuator request. + // Without this override, Spring Security's DelegatingAuthenticationEntryPoint + // would redirect browser-like clients to the form-login page (302 → /login), + // making it impossible to distinguish "not authenticated" from "not found" in tests. + .exceptionHandling(ex -> ex.authenticationEntryPoint( + (req, res, e) -> res.setStatus(HttpServletResponse.SC_UNAUTHORIZED))) + .formLogin(AbstractHttpConfigurer::disable) + .csrf(AbstractHttpConfigurer::disable); + return http.build(); + } + @Bean public SecurityFilterChain securityFilterChain(HttpSecurity http) throws Exception { http @@ -54,13 +79,9 @@ public class SecurityConfig { .csrf(csrf -> csrf.disable()) .authorizeHttpRequests(auth -> { - // Both /actuator/health and /actuator/prometheus must be open. - // In Spring Boot 4.0 the management server (port 8081) shares the security filter chain; - // network isolation (port 8081 not published in docker-compose) is the security boundary. - // Health and Prometheus must be open — no credentials for Docker health checks or Prometheus scraping. - // Note: in Spring Boot 4.0 the management port shares the security filter chain, - // so these paths must be explicitly permitted here even though they are served on port 8081. - // Network isolation (port 8081 not published in docker-compose) is the outer security boundary. + // Actuator endpoints are governed by managementFilterChain (@Order(1)) above. + // The permitAll() lines here are a belt-and-suspenders fallback in case any + // actuator path escapes that chain's securityMatcher. See docs/adr/017. auth.requestMatchers("/actuator/health", "/actuator/prometheus").permitAll(); // Password reset endpoints are unauthenticated by nature auth.requestMatchers("/api/auth/forgot-password", "/api/auth/reset-password").permitAll(); diff --git a/backend/src/test/java/org/raddatz/familienarchiv/ActuatorPrometheusIT.java b/backend/src/test/java/org/raddatz/familienarchiv/ActuatorPrometheusIT.java index 36d91bfa..e68ccfc7 100644 --- a/backend/src/test/java/org/raddatz/familienarchiv/ActuatorPrometheusIT.java +++ b/backend/src/test/java/org/raddatz/familienarchiv/ActuatorPrometheusIT.java @@ -27,14 +27,29 @@ class ActuatorPrometheusIT { S3Client s3Client; @Test - void prometheus_endpoint_returns_jvm_metrics_without_credentials() { + void prometheus_endpoint_returns_200_without_credentials() { ResponseEntity response = noThrowTemplate().getForEntity( "http://localhost:" + managementPort + "/actuator/prometheus", String.class); assertThat(response.getStatusCode().value()).isEqualTo(200); + } + + @Test + void prometheus_endpoint_returns_jvm_metrics() { + ResponseEntity response = noThrowTemplate().getForEntity( + "http://localhost:" + managementPort + "/actuator/prometheus", String.class); + assertThat(response.getBody()).contains("jvm_memory_used_bytes"); } + @Test + void actuator_metrics_requires_authentication() { + ResponseEntity response = noThrowTemplate().getForEntity( + "http://localhost:" + managementPort + "/actuator/metrics", String.class); + + assertThat(response.getStatusCode().value()).isEqualTo(401); + } + private RestTemplate noThrowTemplate() { RestTemplate template = new RestTemplate(); template.setErrorHandler(new DefaultResponseErrorHandler() { -- 2.49.1 From c2d092f435df4403e8ba69c33820da070aa5d803 Mon Sep 17 00:00:00 2001 From: Marcel Date: Sat, 16 May 2026 13:56:20 +0200 Subject: [PATCH 4/9] =?UTF-8?q?docs(adr):=20add=20ADR-017=20=E2=80=94=20Sp?= =?UTF-8?q?ring=20Boot=204.0=20management=20port=20shares=20main=20securit?= =?UTF-8?q?y=20filter=20chain?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Documents the architectural decision behind the dedicated management SecurityFilterChain, the discovery that SB4+Jetty removed the isolated management child-context security, and the consequences for actuator endpoint exposure. Addresses @markus blocker from PR #606 review. Co-Authored-By: Claude Sonnet 4.6 --- docs/adr/017-management-port-security.md | 48 ++++++++++++++++++++++++ 1 file changed, 48 insertions(+) create mode 100644 docs/adr/017-management-port-security.md diff --git a/docs/adr/017-management-port-security.md b/docs/adr/017-management-port-security.md new file mode 100644 index 00000000..ed60150c --- /dev/null +++ b/docs/adr/017-management-port-security.md @@ -0,0 +1,48 @@ +# ADR-017: Spring Boot 4.0 management port shares the main security filter chain + +## Status + +Accepted + +## Context + +The Familienarchiv backend runs Spring Boot Actuator on a dedicated management port (8081) so that Caddy never proxies `/actuator/*` requests and Prometheus can reach the scrape endpoint directly inside `archiv-net`. + +In earlier Spring Boot versions (< 4.0), the management server ran in an isolated child application context whose security was governed independently by `ManagementWebSecurityAutoConfiguration`. The main app's `SecurityConfig` filter chain (port 8080) never intercepted requests arriving on port 8081. + +In Spring Boot 4.0 with Jetty, this isolation was removed. The management server now traverses the **same** Spring Security `FilterChainProxy` as the main application. Concretely: + +- Any `SecurityFilterChain` bean in the application context is evaluated for requests arriving on the management port. +- There is no longer a separate "management security" child context. + +This was discovered when Prometheus began receiving HTTP 401 responses from `/actuator/prometheus` despite the endpoint being exposed and the `micrometer-registry-prometheus` dependency being present. Prometheus rejected these responses with `received unsupported Content-Type "text/html"` because the main filter chain's form-login `DelegatingAuthenticationEntryPoint` was redirecting unauthenticated requests to `/login` (302 → HTML). + +A secondary issue: Spring Boot 4.0 no longer auto-enables Prometheus metrics export — `management.prometheus.metrics.export.enabled` must be set explicitly, and the Prometheus scrape endpoint requires `spring-boot-starter-micrometer-metrics` (a new starter that was split out in Spring Boot 4.0). + +## Decision + +1. **Dedicated management `SecurityFilterChain`** scoped to `/actuator/**` at `@Order(1)` (highest precedence). This chain: + - `permitAll()` for `/actuator/health` and `/actuator/prometheus` — required for Docker health checks and unauthenticated Prometheus scraping. + - `authenticated()` for all other actuator endpoints — blocks `/actuator/metrics`, `/actuator/info`, etc. without credentials. + - Uses an explicit `401` entry point (not form-login redirect) so that API clients — including Prometheus — receive a machine-readable status code rather than an HTML redirect. + - No CSRF, no form login. + +2. **Belt-and-suspenders `permitAll()` in the main `SecurityFilterChain`** for `/actuator/health` and `/actuator/prometheus`, in case a future configuration change causes these paths to escape the management chain's `securityMatcher`. + +3. **Network isolation as the outer defense boundary.** Port 8081 is not published in `docker-compose.yml` and is not routed through Caddy. Only services inside `archiv-net` (primarily Prometheus and the Docker health checker) can reach the management port. + +## Alternatives rejected + +- **Exclude `ManagementWebSecurityAutoConfiguration`:** This auto-configuration no longer exists in Spring Boot 4.0. Exclusion is not applicable. +- **Keep `SecurityConfig` as the sole filter chain without `@Order(1)` management chain:** The main chain's form-login `DelegatingAuthenticationEntryPoint` redirects browser-like clients to `/login` (302). Prometheus and automated health check clients cannot follow this redirect, so the endpoint would be unreachable without a dedicated chain that returns plain 401 or 200. +- **Per-endpoint `@Order(1)` filter chain using `EndpointRequest.toAnyEndpoint()`:** The `spring-boot-security` artifact that provides `EndpointRequest` is not a transitive dependency of `spring-boot-starter-actuator` in Spring Boot 4.0. Using a path-based `securityMatcher("/actuator/**")` achieves the same scoping without an extra dependency. + +## Consequences + +- All actuator endpoints on port 8081 that are not explicitly `permitAll()`-ed require HTTP Basic credentials. Without valid credentials, the response is 401 (not a redirect). +- Adding a new actuator endpoint to `management.endpoints.web.exposure.include` implicitly protects it via `anyRequest().authenticated()` in the management chain — no additional `permitAll()` needed unless intentional. +- A regression test (`ActuatorPrometheusIT`) verifies: + - `/actuator/prometheus` returns 200 without credentials. + - `/actuator/metrics` returns 401 without credentials. + - Prometheus metric names are present in the response body. +- If port 8081 is ever accidentally published in `docker-compose.yml`, actuator endpoints other than health and prometheus are still protected by HTTP Basic. This reduces (but does not eliminate) the risk of inadvertent exposure. -- 2.49.1 From bd57310bbf850d32b7933dbe879d562d0a81e3c0 Mon Sep 17 00:00:00 2001 From: Marcel Date: Sat, 16 May 2026 13:56:51 +0200 Subject: [PATCH 5/9] docs(obs): document promtail job label mapping in DEPLOYMENT.md The job label (derived from the Docker Compose service name) is what powers {job="backend"} queries in Loki dashboards and populates the Grafana "App" variable dropdown. Operators need to know this mapping when writing custom Loki queries. Addresses @markus non-blocker suggestion from PR #606 review. Co-Authored-By: Claude Sonnet 4.6 --- docs/DEPLOYMENT.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/DEPLOYMENT.md b/docs/DEPLOYMENT.md index e959d504..0a1e4b8a 100644 --- a/docs/DEPLOYMENT.md +++ b/docs/DEPLOYMENT.md @@ -374,7 +374,7 @@ Current services: | `obs-node-exporter` | `prom/node-exporter:v1.9.0` | Host-level CPU / memory / disk / network metrics | | `obs-cadvisor` | `gcr.io/cadvisor/cadvisor:v0.52.1` | Per-container resource metrics | | `obs-loki` | `grafana/loki:3.4.2` | Log aggregation — receives log streams from Promtail. Port 3100 is `expose`-only (not host-bound). | -| `obs-promtail` | `grafana/promtail:3.4.2` | Log shipping agent — reads all Docker container logs via the Docker socket and forwards them to Loki with `container_name`, `compose_service`, and `compose_project` labels | +| `obs-promtail` | `grafana/promtail:3.4.2` | Log shipping agent — reads all Docker container logs via the Docker socket and forwards them to Loki with `container_name`, `compose_service`, `compose_project`, and `job` labels. The `job` label is mapped from the Docker Compose service name (`com.docker.compose.service`) so that Grafana Loki dashboard queries (`{job="backend"}`, `{job="frontend"}`) work out of the box and the "App" variable dropdown is populated. | | `obs-tempo` | `grafana/tempo:2.7.2` | Distributed trace storage — OTLP gRPC receiver on port 4317, OTLP HTTP on port 4318 (both `archiv-net`-internal). Grafana queries traces on port 3200 (`obs-net`-internal). All ports are `expose`-only (not host-bound). | | `obs-grafana` | `grafana/grafana-oss:11.6.1` | Unified observability UI — metrics dashboards, log exploration, trace viewer. Bound to `127.0.0.1:${PORT_GRAFANA:-3003}` on the host. | | `obs-glitchtip` | `glitchtip/glitchtip:6.1.6` | Sentry-compatible error tracker. Receives frontend + backend error events, groups by fingerprint, provides issue UI with stack traces. Bound to `127.0.0.1:${PORT_GLITCHTIP:-3002}`. | -- 2.49.1 From 45a992f5a8be266c7c0ccfbe728fac9867af2297 Mon Sep 17 00:00:00 2001 From: Marcel Date: Sat, 16 May 2026 15:18:13 +0200 Subject: [PATCH 6/9] fix(obs): fix OTLP transport port and add application metrics tag - Change OTEL default endpoint from port 4317 (gRPC) to 4318 (HTTP) to match Spring Boot's HttpExporter; sending HTTP/1.1 to a gRPC listener caused "Connection reset" errors - Add otel.logs.exporter=none: Promtail captures Docker logs via the logging driver; sending logs to Tempo's OTLP endpoint (which only handles traces) produced 404 errors - Add management.metrics.tags.application to every metric so Grafana's Spring Boot Observability dashboard (ID 17175) can filter by the application label_values() template variable - Add MANAGEMENT_METRICS_TAGS_APPLICATION and OTEL_LOGS_EXPORTER env vars to docker-compose.prod.yml; production Tempo endpoint already uses 4318 - Add MANAGEMENT_TRACING_SAMPLING_PROBABILITY to prod compose with 0.1 default to avoid 100% trace sampling in production Co-Authored-By: Claude Sonnet 4.6 --- backend/src/main/resources/application.yaml | 12 ++++++++++-- docker-compose.prod.yml | 5 ++++- 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/backend/src/main/resources/application.yaml b/backend/src/main/resources/application.yaml index 5f4fb207..1d502776 100644 --- a/backend/src/main/resources/application.yaml +++ b/backend/src/main/resources/application.yaml @@ -64,6 +64,11 @@ management: metrics: export: enabled: true + metrics: + tags: + # Common tag applied to every metric so Grafana's Spring Boot dashboard can filter by application name. + # Override via MANAGEMENT_METRICS_TAGS_APPLICATION env var. + application: ${spring.application.name} health: mail: enabled: false @@ -72,13 +77,16 @@ management: probability: 1.0 # 100% in dev; override via MANAGEMENT_TRACING_SAMPLING_PROBABILITY in prod compose # OpenTelemetry trace export — failures are non-fatal (app starts cleanly without Tempo running) -# The default http://localhost:4317 ensures CI compatibility when no observability stack is present. +# Port 4318 = OTLP HTTP (the default transport for Spring Boot's HttpExporter). +# Port 4317 is gRPC-only; sending HTTP/1.1 to it produces "Connection reset". otel: service: name: familienarchiv-backend exporter: otlp: - endpoint: ${OTEL_EXPORTER_OTLP_ENDPOINT:http://localhost:4317} + endpoint: ${OTEL_EXPORTER_OTLP_ENDPOINT:http://localhost:4318} + logs: + exporter: none # Promtail captures Docker logs; disable OTLP log export to Tempo (which doesn't accept logs) springdoc: api-docs: diff --git a/docker-compose.prod.yml b/docker-compose.prod.yml index 28730f7e..ff8db357 100644 --- a/docker-compose.prod.yml +++ b/docker-compose.prod.yml @@ -213,7 +213,10 @@ services: APP_MAIL_FROM: ${APP_MAIL_FROM:-noreply@raddatz.cloud} SPRING_MAIL_PROPERTIES_MAIL_SMTP_AUTH: ${MAIL_SMTP_AUTH:-true} SPRING_MAIL_PROPERTIES_MAIL_SMTP_STARTTLS_ENABLE: ${MAIL_STARTTLS_ENABLE:-true} - OTEL_EXPORTER_OTLP_ENDPOINT: http://tempo:4317 + OTEL_EXPORTER_OTLP_ENDPOINT: http://tempo:4318 + OTEL_LOGS_EXPORTER: none + MANAGEMENT_METRICS_TAGS_APPLICATION: Familienarchiv + MANAGEMENT_TRACING_SAMPLING_PROBABILITY: ${MANAGEMENT_TRACING_SAMPLING_PROBABILITY:-0.1} networks: - archiv-net healthcheck: -- 2.49.1 From cea94ce2608558dd91c2d275a6fc92a7bc9e61fe Mon Sep 17 00:00:00 2001 From: Marcel Date: Sat, 16 May 2026 15:20:40 +0200 Subject: [PATCH 7/9] fix(obs): disable OTLP metric export (Prometheus scrapes pull-model) Tempo only handles traces; sending metrics to /v1/metrics returns 404. Prometheus already scrapes Spring Boot metrics via the pull-model at /actuator/prometheus, so OTLP metric push is redundant and noisy. Co-Authored-By: Claude Sonnet 4.6 --- backend/src/main/resources/application.yaml | 4 +++- docker-compose.prod.yml | 1 + 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/backend/src/main/resources/application.yaml b/backend/src/main/resources/application.yaml index 1d502776..776b2ab1 100644 --- a/backend/src/main/resources/application.yaml +++ b/backend/src/main/resources/application.yaml @@ -86,7 +86,9 @@ otel: otlp: endpoint: ${OTEL_EXPORTER_OTLP_ENDPOINT:http://localhost:4318} logs: - exporter: none # Promtail captures Docker logs; disable OTLP log export to Tempo (which doesn't accept logs) + exporter: none # Promtail captures Docker logs; disable OTLP log export (Tempo only accepts traces) + metrics: + exporter: none # Prometheus scrapes /actuator/prometheus; disable OTLP metric export to Tempo springdoc: api-docs: diff --git a/docker-compose.prod.yml b/docker-compose.prod.yml index ff8db357..9fcb453f 100644 --- a/docker-compose.prod.yml +++ b/docker-compose.prod.yml @@ -215,6 +215,7 @@ services: SPRING_MAIL_PROPERTIES_MAIL_SMTP_STARTTLS_ENABLE: ${MAIL_STARTTLS_ENABLE:-true} OTEL_EXPORTER_OTLP_ENDPOINT: http://tempo:4318 OTEL_LOGS_EXPORTER: none + OTEL_METRICS_EXPORTER: none MANAGEMENT_METRICS_TAGS_APPLICATION: Familienarchiv MANAGEMENT_TRACING_SAMPLING_PROBABILITY: ${MANAGEMENT_TRACING_SAMPLING_PROBABILITY:-0.1} networks: -- 2.49.1 From 7713a03cd53e669346fed4ad9fbe635f5085e85d Mon Sep 17 00:00:00 2001 From: Marcel Date: Sat, 16 May 2026 15:27:50 +0200 Subject: [PATCH 8/9] docs(obs): add OBSERVABILITY.md developer guide and fix stale env var docs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - New docs/OBSERVABILITY.md: developer-facing guide with a "where to look for what" table, common LogQL queries, trace exploration workflow, log→trace correlation via traceId links, and a signal summary table - Link from DEPLOYMENT.md §4 (ops section now points to dev guide) and from CLAUDE.md Infrastructure section - Fix stale DEPLOYMENT.md env var table: OTEL_EXPORTER_OTLP_ENDPOINT now documents port 4318 (HTTP) not 4317 (gRPC); add the three new env vars wired in this PR (OTEL_LOGS_EXPORTER, OTEL_METRICS_EXPORTER, MANAGEMENT_METRICS_TAGS_APPLICATION) with their rationale - Fix stale obs-tempo service description (port 4318, not 4317) Co-Authored-By: Claude Sonnet 4.6 --- CLAUDE.md | 4 + docs/DEPLOYMENT.md | 10 ++- docs/OBSERVABILITY.md | 180 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 192 insertions(+), 2 deletions(-) create mode 100644 docs/OBSERVABILITY.md diff --git a/CLAUDE.md b/CLAUDE.md index 88db450b..d3acc1a2 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -299,6 +299,10 @@ Run via `docker-compose.observability.yml` — requires the main stack to be run | `SENTRY_DSN` | GlitchTip/Sentry DSN for the backend (Spring Boot) — leave empty to disable | | `VITE_SENTRY_DSN` | GlitchTip/Sentry DSN for the frontend (SvelteKit) — injected at build time via Vite | +## Observability + +→ See [docs/OBSERVABILITY.md](./docs/OBSERVABILITY.md) — where to look for logs, traces, metrics, and errors. + ## API Testing HTTP test files are in `backend/api_tests/` for use with the VS Code REST Client extension. diff --git a/docs/DEPLOYMENT.md b/docs/DEPLOYMENT.md index 0a1e4b8a..0b852e4d 100644 --- a/docs/DEPLOYMENT.md +++ b/docs/DEPLOYMENT.md @@ -107,7 +107,10 @@ All vars are set in `.env` at the repo root (copy from `.env.example`). The back | `MAIL_SMTP_AUTH` | SMTP auth enabled | `false` (dev) | YES (prod) | — | | `MAIL_STARTTLS_ENABLE` | STARTTLS enabled | `false` (dev) | YES (prod) | — | | `SPRING_PROFILES_ACTIVE` | Spring profile | `dev,e2e` (compose) | YES | — | -| `OTEL_EXPORTER_OTLP_ENDPOINT` | OTLP gRPC endpoint for distributed traces (Tempo). Set to `http://tempo:4317` via compose. | `http://localhost:4317` | — | — | +| `OTEL_EXPORTER_OTLP_ENDPOINT` | OTLP HTTP endpoint for distributed traces (Tempo). Port 4318 = HTTP transport; port 4317 is gRPC-only and causes "Connection reset" with Spring Boot's HttpExporter. | `http://localhost:4318` | — | — | +| `OTEL_LOGS_EXPORTER` | Disable OTLP log export — Promtail captures Docker logs via the logging driver; Tempo does not accept logs. | `none` | — | — | +| `OTEL_METRICS_EXPORTER` | Disable OTLP metric export — Prometheus scrapes `/actuator/prometheus` via pull model; Tempo does not accept metrics. | `none` | — | — | +| `MANAGEMENT_METRICS_TAGS_APPLICATION` | Common tag added to every Micrometer metric. Required by Grafana's Spring Boot Observability dashboard (ID 17175) `label_values(application)` template variable. | `Familienarchiv` | — | — | | `MANAGEMENT_TRACING_SAMPLING_PROBABILITY` | Micrometer tracing sample rate; overridden to `0.0` in test profile. | `0.1` (compose) / `1.0` (dev) | — | — | | `SENTRY_DSN` | GlitchTip / Sentry DSN for backend error reporting. Leave empty to disable the SDK. Set after GlitchTip first-run (§4). | — | — | YES | @@ -280,6 +283,9 @@ Before the first deploy: rotate `PROD_APP_ADMIN_PASSWORD` to a strong value. Aft ## 4. Logs + observability +> **Developer guide (where to look for what, LogQL queries, trace exploration) → [docs/OBSERVABILITY.md](./OBSERVABILITY.md).** +> This section covers the ops side: starting the stack, env vars, and CI wiring. + ### First-response commands ```bash @@ -375,7 +381,7 @@ Current services: | `obs-cadvisor` | `gcr.io/cadvisor/cadvisor:v0.52.1` | Per-container resource metrics | | `obs-loki` | `grafana/loki:3.4.2` | Log aggregation — receives log streams from Promtail. Port 3100 is `expose`-only (not host-bound). | | `obs-promtail` | `grafana/promtail:3.4.2` | Log shipping agent — reads all Docker container logs via the Docker socket and forwards them to Loki with `container_name`, `compose_service`, `compose_project`, and `job` labels. The `job` label is mapped from the Docker Compose service name (`com.docker.compose.service`) so that Grafana Loki dashboard queries (`{job="backend"}`, `{job="frontend"}`) work out of the box and the "App" variable dropdown is populated. | -| `obs-tempo` | `grafana/tempo:2.7.2` | Distributed trace storage — OTLP gRPC receiver on port 4317, OTLP HTTP on port 4318 (both `archiv-net`-internal). Grafana queries traces on port 3200 (`obs-net`-internal). All ports are `expose`-only (not host-bound). | +| `obs-tempo` | `grafana/tempo:2.7.2` | Distributed trace storage — OTLP HTTP receiver on port 4318 (`archiv-net`-internal; backend sends traces here). Grafana queries traces on port 3200 (`obs-net`-internal). All ports are `expose`-only (not host-bound). | | `obs-grafana` | `grafana/grafana-oss:11.6.1` | Unified observability UI — metrics dashboards, log exploration, trace viewer. Bound to `127.0.0.1:${PORT_GRAFANA:-3003}` on the host. | | `obs-glitchtip` | `glitchtip/glitchtip:6.1.6` | Sentry-compatible error tracker. Receives frontend + backend error events, groups by fingerprint, provides issue UI with stack traces. Bound to `127.0.0.1:${PORT_GLITCHTIP:-3002}`. | | `obs-glitchtip-worker` | `glitchtip/glitchtip:6.1.6` | Celery + beat worker — processes async GlitchTip tasks (event ingestion, notifications, cleanup). | diff --git a/docs/OBSERVABILITY.md b/docs/OBSERVABILITY.md new file mode 100644 index 00000000..b895e849 --- /dev/null +++ b/docs/OBSERVABILITY.md @@ -0,0 +1,180 @@ +# Observability Guide + +> **Ops reference (starting the stack, env vars, CI wiring) → [DEPLOYMENT.md §4](./DEPLOYMENT.md#4-logs--observability).** +> This file is for developers: what signal lives where, how to reach it, and what to look for. + +## Where to look for what + +| I want to… | Go to | +|---|---| +| See the last N log lines from the backend | `docker compose logs --tail=100 backend` | +| Search logs by keyword across time | Grafana → Explore → Loki | +| Understand why an HTTP request failed | Grafana → Explore → Loki → filter by `traceId` → follow link to Tempo | +| See a full distributed trace (DB queries, HTTP calls) | Grafana → Explore → Tempo → search by service or trace ID | +| Check JVM heap / GC / thread count | Grafana → Dashboards → Spring Boot Observability | +| Check HTTP error rate or p95 latency | Grafana → Dashboards → Spring Boot Observability | +| Check host CPU / memory / disk | Grafana → Dashboards → Node Exporter Full | +| See grouped application errors with stack traces | GlitchTip | +| Check if the backend is healthy | `curl http://localhost:8081/actuator/health` (on the server) | +| Check what Prometheus is scraping | `curl http://localhost:9090/api/v1/targets` (on the server) | + +## Access + +| Tool | External URL | Who it's for | +|---|---|---| +| Grafana | `https://grafana.archiv.raddatz.cloud` | Logs, metrics, traces — the primary observability UI | +| GlitchTip | `https://glitchtip.archiv.raddatz.cloud` | Grouped errors with stack traces and release tracking | + +Loki, Tempo, and Prometheus have no external URL. They are internal services, accessible only through Grafana (or via SSH tunnel — see below). + +## Logs (Loki) + +Logs reach Loki via Promtail, which reads all Docker container logs from the Docker socket and ships them with labels derived from Docker Compose metadata. + +### Labels available in every log line + +| Label | What it contains | Example | +|---|---|---| +| `job` | Compose service name | `backend`, `frontend`, `db` | +| `compose_service` | Same as `job` | `backend` | +| `compose_project` | Compose project name | `archiv-staging`, `archiv-production` | +| `container_name` | Docker container name | `archiv-staging-backend-1` | +| `filename` | Docker log source | `/var/lib/docker/containers/…` | + +**Use `job` in LogQL queries** — it is stable across dev, staging, and production. `container_name` changes between environments. + +### Common LogQL queries in Grafana Explore + +```logql +# All backend logs +{job="backend"} + +# Backend ERROR and WARN lines only +{job="backend"} |= "ERROR" or {job="backend"} |= "WARN" + +# All logs for a specific request (paste a traceId from a log line) +{job="backend"} |= "3fa85f64-5717-4562-b3fc-2c963f66afa6" + +# Log lines containing a specific exception class +{job="backend"} |~ "DomainException|NullPointerException" + +# Frontend logs +{job="frontend"} + +# Database (slow query log, if enabled) +{job="db"} +``` + +### Log → Trace correlation + +Spring Boot writes the active `traceId` into every log line when a request is being processed: + +``` +2026-05-16 ... INFO [Familienarchiv,3fa85f64...,1b2c3d4e] o.r.f.document.DocumentService : ... +``` + +In Grafana Explore → Loki, log lines with a `traceId` field show a **Tempo** link. Clicking it opens the full trace in Explore → Tempo without copying and pasting IDs. + +This linking is configured in the Loki datasource provisioning via the `traceId` derived field regex. No manual setup required. + +## Traces (Tempo) + +The backend sends traces to Tempo via OTLP HTTP (port 4318). Every inbound HTTP request and every JPA query produces a span. Spans are linked into traces by the propagated `traceId` header. + +### Finding a trace in Grafana + +**Option A — from a log line:** +1. Grafana → Explore → select *Loki* datasource +2. Query `{job="backend"}` and find the failing request +3. Click the **Tempo** link in the log line (appears when `traceId` is present) + +**Option B — by service:** +1. Grafana → Explore → select *Tempo* datasource +2. Query type: **Search** +3. Service name: `familienarchiv-backend` +4. Filter by HTTP status, duration, or operation name as needed + +**Option C — by trace ID:** +1. Grafana → Explore → select *Tempo* datasource +2. Query type: **TraceQL** or **Trace ID** +3. Paste the trace ID + +### What each span type tells you + +| Root span name pattern | What it covers | +|---|---| +| `GET /api/documents`, `POST /api/documents` | Full HTTP request lifecycle | +| `SELECT archiv.*` | A single JPA/JDBC query inside that request | +| `HikariPool.getConnection` | Connection pool wait time | + +A slow `SELECT` span inside an otherwise fast HTTP span pinpoints a missing index. A slow `HikariPool.getConnection` span indicates connection pool exhaustion. + +### Sampling rate + +- **Dev**: 100% of requests are traced (`management.tracing.sampling.probability: 1.0` in `application.yaml`) +- **Staging / Production**: 10% (`MANAGEMENT_TRACING_SAMPLING_PROBABILITY=0.1` in `docker-compose.prod.yml`) + +To find a trace for a specific request in staging/production, either increase the sampling rate temporarily or trigger the request multiple times. + +## Metrics (Prometheus → Grafana) + +Prometheus scrapes the backend management endpoint every 15 s: + +``` +Target: backend:8081/actuator/prometheus +Labels: job="spring-boot", application="Familienarchiv" +``` + +All Spring Boot metrics carry the `application="Familienarchiv"` tag, which is how the Grafana Spring Boot Observability dashboard (ID 17175) filters to this service. + +### Useful Prometheus queries (run on the server or via Grafana Explore → Prometheus) + +```promql +# HTTP error rate (5xx) as a fraction of all requests +sum(rate(http_server_requests_seconds_count{status=~"5.."}[5m])) + / sum(rate(http_server_requests_seconds_count[5m])) + +# p95 response time +histogram_quantile(0.95, sum by (le) ( + rate(http_server_requests_seconds_bucket[5m]) +)) + +# JVM heap used +jvm_memory_used_bytes{area="heap", application="Familienarchiv"} + +# Active DB connections +hikaricp_connections_active +``` + +## Errors (GlitchTip) + +GlitchTip receives errors from both the backend (via Sentry Java SDK) and the frontend (via Sentry JavaScript SDK). It groups events by fingerprint, tracks first/last seen times, and links to the release that introduced the error. + +GlitchTip complements Loki: use GlitchTip when you need **grouped, de-duplicated errors with stack traces and release attribution**; use Loki when you need **raw log lines with full context** or want to search across all log levels. + +## Direct API access (debugging only) + +Loki and Tempo bind no host ports. To reach them directly from your laptop, use an SSH tunnel through the server: + +```bash +# Loki API on localhost:3100 (then query via curl or logcli) +ssh -L 3100:172.20.0.x:3100 root@raddatz.cloud +# Replace 172.20.0.x with the obs-loki container IP: +# docker inspect obs-loki --format '{{.NetworkSettings.Networks.archiv-obs-net.IPAddress}}' + +# Tempo API on localhost:3200 (then query via curl or tempo-cli) +ssh -L 3200:172.20.0.x:3200 root@raddatz.cloud +``` + +In practice, Grafana Explore covers all common debugging workflows without needing direct API access. + +## Signal summary + +| Signal | Source | Transport | Storage | UI | +|---|---|---|---|---| +| Application logs | Spring Boot stdout → Docker log driver | Promtail → Loki push API | Loki | Grafana Explore → Loki | +| Distributed traces | Spring Boot OTel agent | OTLP HTTP → Tempo:4318 | Tempo | Grafana Explore → Tempo | +| JVM + HTTP metrics | Spring Actuator `/actuator/prometheus` | Prometheus pull (15 s) | Prometheus | Grafana dashboards | +| Host metrics | node-exporter | Prometheus pull | Prometheus | Grafana → Node Exporter Full | +| Container metrics | cAdvisor | Prometheus pull | Prometheus | Grafana (via Prometheus datasource) | +| Application errors | Sentry SDK | HTTP POST → GlitchTip ingest | GlitchTip DB | GlitchTip UI | -- 2.49.1 From e61409773e337019056150638d7b08269a3016fa Mon Sep 17 00:00:00 2001 From: Marcel Date: Sat, 16 May 2026 15:44:27 +0200 Subject: [PATCH 9/9] docs(c4): fix Tempo OTLP transport in l2-containers diagram Port 4317 is gRPC; the backend uses HttpExporter (HTTP/1.1) and sends to port 4318. Update Container description and Rel label to match. Co-Authored-By: Claude Sonnet 4.6 --- docs/architecture/c4/l2-containers.puml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/architecture/c4/l2-containers.puml b/docs/architecture/c4/l2-containers.puml index 02167cdb..346efe75 100644 --- a/docs/architecture/c4/l2-containers.puml +++ b/docs/architecture/c4/l2-containers.puml @@ -23,7 +23,7 @@ System_Boundary(observability, "Observability Stack (/opt/familienarchiv/docker- Container(cadvisor, "cAdvisor", "gcr.io/cadvisor/cadvisor:v0.52.1", "Per-container resource metrics.") Container(loki, "Loki", "grafana/loki:3.4.2", "Stores log streams from all containers.") Container(promtail, "Promtail", "grafana/promtail:3.4.2", "Ships Docker container logs to Loki via Docker SD.") - Container(tempo, "Tempo", "grafana/tempo:2.7.2", "Distributed trace storage. OTLP gRPC receiver on port 4317 (archiv-net). Grafana queries traces on port 3200 (obs-net). All ports internal only.") + Container(tempo, "Tempo", "grafana/tempo:2.7.2", "Distributed trace storage. OTLP HTTP receiver on port 4318 (archiv-net). Grafana queries traces on port 3200 (obs-net). All ports internal only.") Container(grafana, "Grafana", "grafana/grafana-oss:11.6.1", "Unified observability UI — dashboards, logs, traces. Datasources (Prometheus, Loki, Tempo) and three dashboards are auto-provisioned.") Container(glitchtip, "GlitchTip", "glitchtip/glitchtip:6.1.6", "Sentry-compatible error tracker — web process. Receives frontend + backend error events, groups by fingerprint, provides issue UI with stack traces.") Container(obs_glitchtip_worker, "GlitchTip Worker", "glitchtip/glitchtip:6.1.6", "Celery + beat worker — async event ingestion, notifications, cleanup.") @@ -42,7 +42,7 @@ Rel(backend, mail, "Sends notification and password-reset emails (optional)", "S Rel(ocr, storage, "Fetches PDF via presigned URL", "HTTP / S3 presigned") Rel(mc, storage, "Bootstraps bucket + service account on startup", "MinIO Client CLI") Rel(promtail, loki, "Pushes log streams", "HTTP/Loki push API") -Rel(backend, tempo, "Sends distributed traces via OTLP", "gRPC / OTLP / port 4317 (archiv-net)") +Rel(backend, tempo, "Sends distributed traces via OTLP", "HTTP / OTLP / port 4318 (archiv-net)") Rel(grafana, prometheus, "Queries metrics", "HTTP 9090") Rel(grafana, loki, "Queries logs", "HTTP 3100") Rel(grafana, tempo, "Queries traces", "HTTP 3200") -- 2.49.1