docs(ci): document workspace bind-mount setup for DooD runners

Add the /srv/gitea-workspace prerequisite step to DEPLOYMENT.md §3.1 and a new "Workspace bind-mount setup" subsection plus failure mode 4 to ci-gitea.md, covering the root cause, one-time host setup, disk management, and troubleshooting for the bind-mount resolution fix introduced in ADR-015. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
docs(adr): add ADR-015 for DooD workspace bind-mount approach
2026-05-15 19:46:54 +02:00 · 2026-05-15 19:38:18 +02:00 · 2026-05-15 19:36:55 +02:00 · 2026-05-15 19:33:36 +02:00 · 2026-05-15 19:02:53 +02:00 · 2026-05-15 18:07:32 +02:00
21 changed files with 300 additions and 13 deletions
--- a/.env.example
+++ b/.env.example
@@ -50,6 +50,7 @@ GLITCHTIP_SECRET_KEY=changeme-generate-a-real-secret
 # Error reporting DSNs — leave empty to disable the SDK (safe default).
 # SENTRY_DSN: backend (Spring Boot) — used by the GlitchTip/Sentry Java SDK
 SENTRY_DSN=
+SENTRY_TRACES_SAMPLE_RATE=
 # VITE_SENTRY_DSN: frontend (SvelteKit) — injected at build time via Vite
 VITE_SENTRY_DSN=
 # Sentry/GlitchTip auth token for source map upload at build time (optional)
--- a/.gitea/workflows/ci.yml
+++ b/.gitea/workflows/ci.yml
@@ -305,6 +305,7 @@ jobs:
          MAIL_PORT=1025
          APP_MAIL_FROM=noreply@local
          IMPORT_HOST_DIR=/tmp/dummy-import
+          COMPOSE_NETWORK_NAME=test-idem-archiv-net
          EOF

      - name: Bring up minio
--- a/.gitea/workflows/nightly.yml
+++ b/.gitea/workflows/nightly.yml
@@ -30,6 +30,9 @@ name: nightly
 #   STAGING_OCR_TRAINING_TOKEN
 #   STAGING_APP_ADMIN_USERNAME
 #   STAGING_APP_ADMIN_PASSWORD
+#   GRAFANA_ADMIN_PASSWORD
+#   GLITCHTIP_SECRET_KEY
+#   SENTRY_DSN                  (set after GlitchTip first-run; empty = Sentry disabled)

 on:
  schedule:
@@ -74,6 +77,14 @@ jobs:
          MAIL_STARTTLS_ENABLE=false
          APP_MAIL_FROM=noreply@staging.raddatz.cloud
          IMPORT_HOST_DIR=/srv/familienarchiv-staging/import
+          POSTGRES_USER=archiv
+          PORT_GRAFANA=3003
+          PORT_GLITCHTIP=3002
+          PORT_PROMETHEUS=9090
+          GRAFANA_ADMIN_PASSWORD=${{ secrets.GRAFANA_ADMIN_PASSWORD }}
+          GLITCHTIP_SECRET_KEY=${{ secrets.GLITCHTIP_SECRET_KEY }}
+          GLITCHTIP_DOMAIN=https://glitchtip.archiv.raddatz.cloud
+          SENTRY_DSN=${{ secrets.SENTRY_DSN }}
          EOF

      - name: Verify backend /import:ro mount is wired
@@ -120,6 +131,13 @@ jobs:
            --profile staging \
            up -d --wait --remove-orphans

+      - name: Start observability stack
+        run: |
+          docker compose \
+            -f docker-compose.observability.yml \
+            --env-file .env.staging \
+            up -d --wait --remove-orphans
+
      - name: Reload Caddy
        # Apply any committed Caddyfile changes before smoke-testing the
        # public surface. Without this step, a Caddyfile edit lands in the
--- a/.gitea/workflows/release.yml
+++ b/.gitea/workflows/release.yml
@@ -34,6 +34,9 @@ name: release
 #   MAIL_PORT
 #   MAIL_USERNAME
 #   MAIL_PASSWORD
+#   GRAFANA_ADMIN_PASSWORD
+#   GLITCHTIP_SECRET_KEY
+#   SENTRY_DSN                    (set after GlitchTip first-run; empty = Sentry disabled)

 on:
  push:
@@ -72,6 +75,14 @@ jobs:
          MAIL_STARTTLS_ENABLE=true
          APP_MAIL_FROM=noreply@raddatz.cloud
          IMPORT_HOST_DIR=/srv/familienarchiv-production/import
+          POSTGRES_USER=archiv
+          PORT_GRAFANA=3003
+          PORT_GLITCHTIP=3002
+          PORT_PROMETHEUS=9090
+          GRAFANA_ADMIN_PASSWORD=${{ secrets.GRAFANA_ADMIN_PASSWORD }}
+          GLITCHTIP_SECRET_KEY=${{ secrets.GLITCHTIP_SECRET_KEY }}
+          GLITCHTIP_DOMAIN=https://glitchtip.archiv.raddatz.cloud
+          SENTRY_DSN=${{ secrets.SENTRY_DSN }}
          EOF

      - name: Build images
@@ -93,6 +104,13 @@ jobs:
            --env-file .env.production \
            up -d --wait --remove-orphans

+      - name: Start observability stack
+        run: |
+          docker compose \
+            -f docker-compose.observability.yml \
+            --env-file .env.production \
+            up -d --wait --remove-orphans
+
      - name: Reload Caddy
        # See nightly.yml — same rationale and mechanism: DooD job containers
        # cannot call systemctl directly; nsenter via a privileged sibling
--- a/backend/pom.xml
+++ b/backend/pom.xml
@@ -29,6 +29,20 @@
 	<properties>
 		<java.version>21</java.version>
 	</properties>
+	<dependencyManagement>
+		<dependencies>
+			<!-- opentelemetry-spring-boot-starter:2.27.0 was built against opentelemetry-api:1.61.0,
+			     but Spring Boot 4.0.0 BOM only manages 1.55.0 (missing GlobalOpenTelemetry.getOrNoop()).
+			     Import the core OTel BOM here to override it before the Spring Boot BOM applies. -->
+			<dependency>
+				<groupId>io.opentelemetry</groupId>
+				<artifactId>opentelemetry-bom</artifactId>
+				<version>1.61.0</version>
+				<type>pom</type>
+				<scope>import</scope>
+			</dependency>
+		</dependencies>
+	</dependencyManagement>
 	<dependencies>
 		<dependency>
 			<groupId>org.springframework.boot</groupId>
@@ -224,6 +238,15 @@
 				</exclusion>
 			</exclusions>
 		</dependency>
+
+		<!-- Sentry error reporting (GlitchTip-compatible) — sentry-spring-boot-4 is the
+		     Spring Boot 4 / Spring Framework 7 compatible module (replaces the jakarta starter
+		     which crashes with SF7 due to bean-name generation for triply-nested @Import classes) -->
+		<dependency>
+			<groupId>io.sentry</groupId>
+			<artifactId>sentry-spring-boot-4</artifactId>
+			<version>8.41.0</version>
+		</dependency>
 	</dependencies>


--- a/backend/src/main/java/org/raddatz/familienarchiv/exception/GlobalExceptionHandler.java
+++ b/backend/src/main/java/org/raddatz/familienarchiv/exception/GlobalExceptionHandler.java
@@ -2,6 +2,7 @@ package org.raddatz.familienarchiv.exception;

 import java.util.stream.Collectors;

+import io.sentry.Sentry;
 import jakarta.validation.ConstraintViolationException;
 import org.raddatz.familienarchiv.exception.DomainException;
 import org.raddatz.familienarchiv.exception.ErrorCode;
@@ -63,6 +64,7 @@ public class GlobalExceptionHandler {

    @ExceptionHandler(Exception.class)
    public ResponseEntity<ErrorResponse> handleGeneric(Exception ex) {
+        Sentry.captureException(ex);
        log.error("Unhandled exception", ex);
        return ResponseEntity.internalServerError()
            .body(new ErrorResponse(ErrorCode.INTERNAL_ERROR, "An unexpected error occurred"));
--- a/backend/src/main/resources/application.yaml
+++ b/backend/src/main/resources/application.yaml
@@ -118,3 +118,12 @@ ocr:
  sender-model:
    activation-threshold: 100
    retrain-delta: 50
+
+sentry:
+  dsn: ${SENTRY_DSN:}
+  environment: ${SPRING_PROFILES_ACTIVE:dev}
+  traces-sample-rate: ${SENTRY_TRACES_SAMPLE_RATE:1.0}
+  send-default-pii: false
+  enable-tracing: true
+  ignored-exceptions-for-type:
+    - org.raddatz.familienarchiv.exception.DomainException
--- a/backend/src/test/java/org/raddatz/familienarchiv/ApplicationContextTest.java
+++ b/backend/src/test/java/org/raddatz/familienarchiv/ApplicationContextTest.java
@@ -1,14 +1,18 @@
 package org.raddatz.familienarchiv;

 import org.junit.jupiter.api.Test;
+import org.springframework.beans.factory.annotation.Autowired;
 import org.springframework.boot.test.context.SpringBootTest;
 import org.springframework.boot.testcontainers.service.connection.ServiceConnection;
+import org.springframework.context.ApplicationContext;
 import org.springframework.context.annotation.Import;
 import org.springframework.test.context.ActiveProfiles;
 import org.springframework.test.context.bean.override.mockito.MockitoBean;
 import org.testcontainers.containers.PostgreSQLContainer;
 import software.amazon.awssdk.services.s3.S3Client;

+import static org.assertj.core.api.Assertions.assertThat;
+
@SpringBootTest(webEnvironment = SpringBootTest.WebEnvironment.NONE)
@ActiveProfiles("test")
@Import(PostgresContainerConfig.class)
@@ -17,9 +21,18 @@ class ApplicationContextTest {
    @MockitoBean
    S3Client s3Client;

+    @Autowired
+    ApplicationContext ctx;
+
    @Test
    void contextLoads() {
        // verifies that the Spring context starts successfully with all beans wired,
        // Flyway migrations applied, and no configuration errors
    }
+
+    @Test
+    void sentry_is_disabled_when_no_dsn_is_configured() {
+        // application-test.yaml has no sentry.dsn — SDK must stay inactive so tests are clean
+        assertThat(io.sentry.Sentry.isEnabled()).isFalse();
+    }
 }
--- a/backend/src/test/java/org/raddatz/familienarchiv/audit/AuditServiceIntegrationTest.java
+++ b/backend/src/test/java/org/raddatz/familienarchiv/audit/AuditServiceIntegrationTest.java
@@ -1,11 +1,11 @@
 package org.raddatz.familienarchiv.audit;

+import org.junit.jupiter.api.BeforeEach;
 import org.junit.jupiter.api.Test;
 import org.raddatz.familienarchiv.PostgresContainerConfig;
 import org.springframework.beans.factory.annotation.Autowired;
 import org.springframework.boot.test.context.SpringBootTest;
 import org.springframework.context.annotation.Import;
-import org.springframework.test.annotation.DirtiesContext;
 import org.springframework.test.context.ActiveProfiles;
 import org.springframework.test.context.bean.override.mockito.MockitoBean;
 import org.springframework.transaction.support.TransactionTemplate;
@@ -18,7 +18,6 @@ import static org.awaitility.Awaitility.await;
@SpringBootTest(webEnvironment = SpringBootTest.WebEnvironment.NONE)
@ActiveProfiles("test")
@Import(PostgresContainerConfig.class)
-@DirtiesContext(classMode = DirtiesContext.ClassMode.AFTER_EACH_TEST_METHOD)
 class AuditServiceIntegrationTest {

    @MockitoBean S3Client s3Client;
@@ -26,6 +25,11 @@ class AuditServiceIntegrationTest {
    @Autowired AuditLogRepository auditLogRepository;
    @Autowired TransactionTemplate transactionTemplate;

+    @BeforeEach
+    void resetAuditLog() {
+        auditLogRepository.deleteAll();
+    }
+
    @Test
    void logAfterCommit_writes_ANNOTATION_CREATED_row_after_transaction_commits() {
        transactionTemplate.execute(status -> {
--- a/backend/src/test/java/org/raddatz/familienarchiv/document/DocumentSearchPagedIntegrationTest.java
+++ b/backend/src/test/java/org/raddatz/familienarchiv/document/DocumentSearchPagedIntegrationTest.java
@@ -12,9 +12,9 @@ import org.springframework.beans.factory.annotation.Autowired;
 import org.springframework.boot.test.context.SpringBootTest;
 import org.springframework.context.annotation.Import;
 import org.springframework.data.domain.PageRequest;
-import org.springframework.test.annotation.DirtiesContext;
 import org.springframework.test.context.ActiveProfiles;
 import org.springframework.test.context.bean.override.mockito.MockitoBean;
+import org.springframework.transaction.annotation.Transactional;
 import software.amazon.awssdk.services.s3.S3Client;

 import java.time.LocalDate;
@@ -33,7 +33,7 @@ import static org.assertj.core.api.Assertions.assertThat;
@SpringBootTest(webEnvironment = SpringBootTest.WebEnvironment.NONE)
@ActiveProfiles("test")
@Import(PostgresContainerConfig.class)
-@DirtiesContext(classMode = DirtiesContext.ClassMode.AFTER_EACH_TEST_METHOD)
+@Transactional
 class DocumentSearchPagedIntegrationTest {

    private static final int FIXTURE_SIZE = 120;
--- a/backend/src/test/java/org/raddatz/familienarchiv/exception/GlobalExceptionHandlerTest.java
+++ b/backend/src/test/java/org/raddatz/familienarchiv/exception/GlobalExceptionHandlerTest.java
@@ -0,0 +1,33 @@
+package org.raddatz.familienarchiv.exception;
+
+import io.sentry.Sentry;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.extension.ExtendWith;
+import org.mockito.InjectMocks;
+import org.mockito.MockedStatic;
+import org.mockito.junit.jupiter.MockitoExtension;
+import org.springframework.http.ResponseEntity;
+
+import static org.assertj.core.api.Assertions.assertThat;
+import static org.mockito.Mockito.mockStatic;
+
+@ExtendWith(MockitoExtension.class)
+class GlobalExceptionHandlerTest {
+
+    @InjectMocks
+    private GlobalExceptionHandler handler;
+
+    @Test
+    void handleGeneric_captures_exception_in_sentry_and_returns_500() {
+        RuntimeException ex = new RuntimeException("unexpected failure");
+
+        try (MockedStatic<Sentry> sentryMock = mockStatic(Sentry.class)) {
+            ResponseEntity<GlobalExceptionHandler.ErrorResponse> response = handler.handleGeneric(ex);
+
+            sentryMock.verify(() -> Sentry.captureException(ex));
+            assertThat(response.getStatusCode().value()).isEqualTo(500);
+            assertThat(response.getBody()).isNotNull();
+            assertThat(response.getBody().code()).isEqualTo(ErrorCode.INTERNAL_ERROR);
+        }
+    }
+}
--- a/backend/src/test/java/org/raddatz/familienarchiv/geschichte/GeschichteServiceIntegrationTest.java
+++ b/backend/src/test/java/org/raddatz/familienarchiv/geschichte/GeschichteServiceIntegrationTest.java
@@ -19,9 +19,9 @@ import org.springframework.context.annotation.Import;
 import org.springframework.security.authentication.UsernamePasswordAuthenticationToken;
 import org.springframework.security.core.authority.SimpleGrantedAuthority;
 import org.springframework.security.core.context.SecurityContextHolder;
-import org.springframework.test.annotation.DirtiesContext;
 import org.springframework.test.context.ActiveProfiles;
 import org.springframework.test.context.bean.override.mockito.MockitoBean;
+import org.springframework.transaction.annotation.Transactional;
 import software.amazon.awssdk.services.s3.S3Client;

 import java.util.List;
@@ -32,7 +32,7 @@ import static org.assertj.core.api.Assertions.assertThat;
@SpringBootTest(webEnvironment = SpringBootTest.WebEnvironment.NONE)
@ActiveProfiles("test")
@Import(PostgresContainerConfig.class)
-@DirtiesContext(classMode = DirtiesContext.ClassMode.AFTER_EACH_TEST_METHOD)
+@Transactional
 class GeschichteServiceIntegrationTest {

    @MockitoBean
--- a/backend/src/test/java/org/raddatz/familienarchiv/person/PersonServiceIntegrationTest.java
+++ b/backend/src/test/java/org/raddatz/familienarchiv/person/PersonServiceIntegrationTest.java
@@ -8,9 +8,9 @@ import org.raddatz.familienarchiv.person.PersonRepository;
 import org.springframework.beans.factory.annotation.Autowired;
 import org.springframework.boot.test.context.SpringBootTest;
 import org.springframework.context.annotation.Import;
-import org.springframework.test.annotation.DirtiesContext;
 import org.springframework.test.context.ActiveProfiles;
 import org.springframework.test.context.bean.override.mockito.MockitoBean;
+import org.springframework.transaction.annotation.Transactional;
 import software.amazon.awssdk.services.s3.S3Client;

 import static org.assertj.core.api.Assertions.assertThat;
@@ -18,7 +18,7 @@ import static org.assertj.core.api.Assertions.assertThat;
@SpringBootTest(webEnvironment = SpringBootTest.WebEnvironment.NONE)
@ActiveProfiles("test")
@Import(PostgresContainerConfig.class)
-@DirtiesContext(classMode = DirtiesContext.ClassMode.AFTER_EACH_TEST_METHOD)
+@Transactional
 class PersonServiceIntegrationTest {

    @MockitoBean S3Client s3Client;
--- a/docker-compose.observability.yml
+++ b/docker-compose.observability.yml
@@ -184,7 +184,7 @@ services:
      - obs-net

  obs-glitchtip:
-    image: glitchtip/glitchtip:v4
+    image: glitchtip/glitchtip:6.1.6
    container_name: obs-glitchtip
    restart: unless-stopped
    depends_on:
@@ -207,7 +207,7 @@ services:
      - obs-net

  obs-glitchtip-worker:
-    image: glitchtip/glitchtip:v4
+    image: glitchtip/glitchtip:6.1.6
    container_name: obs-glitchtip-worker
    restart: unless-stopped
    command: ./bin/run-celery-with-beat.sh
--- a/docker-compose.prod.yml
+++ b/docker-compose.prod.yml
@@ -39,6 +39,7 @@
 networks:
  archiv-net:
    driver: bridge
+    name: ${COMPOSE_NETWORK_NAME:-archiv-net}

 volumes:
  postgres-data:
@@ -212,10 +213,11 @@ services:
      APP_MAIL_FROM: ${APP_MAIL_FROM:-noreply@raddatz.cloud}
      SPRING_MAIL_PROPERTIES_MAIL_SMTP_AUTH: ${MAIL_SMTP_AUTH:-true}
      SPRING_MAIL_PROPERTIES_MAIL_SMTP_STARTTLS_ENABLE: ${MAIL_STARTTLS_ENABLE:-true}
+      OTEL_EXPORTER_OTLP_ENDPOINT: http://tempo:4317
    networks:
      - archiv-net
    healthcheck:
-      test: ["CMD-SHELL", "wget -qO- http://localhost:8080/actuator/health | grep -q UP || exit 1"]
+      test: ["CMD-SHELL", "wget -qO- http://localhost:8081/actuator/health | grep -q UP || exit 1"]
      interval: 15s
      timeout: 5s
      retries: 10
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -147,6 +147,8 @@ services:
      SPRING_MAIL_PROPERTIES_MAIL_SMTP_STARTTLS_ENABLE: ${MAIL_STARTTLS_ENABLE:-false}
      APP_OCR_BASE_URL: http://ocr-service:8000
      APP_OCR_TRAINING_TOKEN: "${OCR_TRAINING_TOKEN:-}"
+      SENTRY_DSN: ${SENTRY_DSN:-}
+      SENTRY_TRACES_SAMPLE_RATE: ${SENTRY_TRACES_SAMPLE_RATE:-1.0}
      # Observability: send traces to Tempo inside archiv-net (OTLP gRPC port 4317)
      # Tempo is defined in docker-compose.observability.yml (future issue).
      # OTLP failures are non-fatal — backend starts cleanly without the observability stack.
--- a/docs/DEPLOYMENT.md
+++ b/docs/DEPLOYMENT.md
@@ -193,6 +193,15 @@ curl -fsSL https://tailscale.com/install.sh | sh && tailscale up
 # files to disk during execution (cleaned up unconditionally on completion).
 # A multi-tenant runner would need to switch to stdin-piped env files.
 # (See https://docs.gitea.com/usage/actions/quickstart for the register step.)
+
+# Runner workspace directory — required for DooD bind-mount resolution (ADR-015).
+# act_runner stores job workspaces here so that docker compose bind mounts resolve
+# to real host paths. The path must be identical on the host and inside job containers.
+mkdir -p /srv/gitea-workspace
+# Also add this volume line to the runner service in ~/docker/gitea/compose.yaml:
+#   volumes:
+#     - /srv/gitea-workspace:/srv/gitea-workspace
+# See runner-config.yaml (workdir_parent + valid_volumes + options) and ADR-015.
 ```

 ### 3.2 DNS records
@@ -223,6 +232,9 @@ git.raddatz.cloud      A   <server IP>
 | `MAIL_PORT` | release.yml | typically `587` |
 | `MAIL_USERNAME` | release.yml | SMTP user |
 | `MAIL_PASSWORD` | release.yml | SMTP password |
+| `GRAFANA_ADMIN_PASSWORD` | both | Grafana `admin` login — generate a strong password |
+| `GLITCHTIP_SECRET_KEY` | both | Django secret key — `openssl rand -hex 32` |
+| `SENTRY_DSN` | both | GlitchTip project DSN — set after first-run (§4); leave empty to keep Sentry disabled |

 ### 3.4 First deploy

--- a/docs/adr/015-dood-workspace-bind-mount.md
+++ b/docs/adr/015-dood-workspace-bind-mount.md
@@ -0,0 +1,69 @@
+# ADR-015: DooD workspace bind mount for Compose file bind-mount resolution
+
+## Status
+
+Accepted
+
+## Context
+
+The deploy workflows (`.gitea/workflows/nightly.yml`, `release.yml`) run job steps inside Docker containers via Docker-out-of-Docker (DooD): the Gitea runner mounts the host Docker socket, and act_runner spawns sibling containers for each job.
+
+When a job step calls `docker compose -f docker-compose.observability.yml up`, Docker Compose resolves relative bind-mount sources against `$(pwd)` inside the job container and passes the resulting absolute paths to the **host** daemon. For example, `./infra/observability/prometheus/prometheus.yml` becomes `/some/path/infra/observability/prometheus/prometheus.yml`, and the host daemon tries to bind-mount that path from the **host filesystem**.
+
+In the default DooD setup (`runner-config.yaml` with only `valid_volumes: ["/var/run/docker.sock"]`), job container workspaces live in the act_runner overlay2 layer. The host has no corresponding directory at the job container's `$(pwd)` path, so the daemon auto-creates an empty directory in its place. The container then fails to start because the mount target was expected to be a file, not a directory:
+
+```
+error mounting "…/prometheus/prometheus.yml" to rootfs at "/etc/prometheus/prometheus.yml": not a directory
+```
+
+This affected all five config file bind mounts in `docker-compose.observability.yml`.
+
+## Decision
+
+Configure act_runner to store job workspaces on a real host path (`/srv/gitea-workspace`) and mount that path into both the runner container and every job container at the **same absolute path**. The identity of the host path and container path is the key constraint: Compose resolves to an absolute path and hands it to the host daemon, which looks for that exact path on the host filesystem.
+
+**runner-config.yaml changes:**
+
+```yaml
+container:
+  workdir_parent: /srv/gitea-workspace
+  valid_volumes:
+    - "/var/run/docker.sock"
+    - "/srv/gitea-workspace"
+  options: "-v /srv/gitea-workspace:/srv/gitea-workspace"
+```
+
+**Runner compose.yaml change** (host side — not in this repo):
+
+```yaml
+runner:
+  volumes:
+    - /srv/gitea-workspace:/srv/gitea-workspace
+```
+
+With this in place, `$(pwd)` inside a job container resolves to `/srv/gitea-workspace/<owner>/<repo>/`, which is a real directory on the host. Compose-managed bind mounts from that directory work without any additional steps.
+
+## Alternatives Considered
+
+| Alternative | Why rejected |
+|---|---|
+| **overlay2 `MergedDir` sync via privileged nsenter** (the previous approach, see PR #599 v1) | Required `--privileged --pid=host` (effective root on the host) plus fragile overlay2 driver assumption. Introduced stale-file risk on the host and a second stable path (`/srv/familienarchiv-*/obs-configs`) to maintain separately from the source tree. Replaced by this ADR. |
+| **Build configs into a dedicated Docker image** (pattern used for MinIO bootstrap, see `infra/minio/Dockerfile`) | Viable for static files that change infrequently. Requires a build step and an image rebuild every time a config changes. Appropriate for bootstrap scripts; too heavy for frequently-tuned observability configs. |
+| **Add workspace directory to runner-config `valid_volumes` only** (without `workdir_parent`) | `valid_volumes` whitelists paths that workflow steps may reference, but does not change where act_runner stores workspaces. Without `workdir_parent`, the workspace would still be in overlay2 and the bind-mount resolution problem would remain. |
+| **Map workspace under a different host path than container path** (e.g. host `/srv/workspace`, container `/workspace`) | Compose resolves to the container-internal path (e.g. `/workspace/…`) and passes that to the host daemon. The host daemon interprets the source as a host path. If host `/workspace` does not exist, the daemon creates an empty directory — the original bug. The paths must be identical. |
+
+## Consequences
+
+- `/srv/gitea-workspace` must exist on the VPS before the runner starts. The directory was created as part of this change; it is not created automatically.
+- The runner container's `compose.yaml` (maintained outside this repo at `~/docker/gitea/compose.yaml` on the VPS) must include the `- /srv/gitea-workspace:/srv/gitea-workspace` volume line. This is an out-of-band operational dependency; the prerequisite is documented in `runner-config.yaml`.
+- `workdir_parent` applies to all jobs on this runner. Any future workflow that calls `docker compose` with relative bind mounts benefits automatically without further configuration.
+- Job workspaces persist across runs under `/srv/gitea-workspace`. act_runner manages per-run subdirectory cleanup. Orphaned directories from interrupted runs should be cleaned up manually if disk space becomes a concern.
+- Workflows that previously relied on `OBS_CONFIG_DIR` env var or the `obs-configs` stable path on the host no longer need those. Both were removed in this PR.
+- This pattern does **not** apply to the `nsenter`-based Caddy reload step (ADR-012), which manages a host systemd service — a different problem class with no bind-mount equivalent.
+
+## References
+
+- ADR-011 — single-tenant runner trust model
+- ADR-012 — nsenter via privileged container for host service management
+- Issue #598 — original observability stack bind-mount failure
+- `runner-config.yaml` — `workdir_parent`, `valid_volumes`, `options`
--- a/docs/infrastructure/ci-gitea.md
+++ b/docs/infrastructure/ci-gitea.md
@@ -19,6 +19,39 @@ Both containers live in the `gitea_gitea` Docker network on the VPS. The runner

 The `gitea-runner` container mounts the host Docker socket (`/var/run/docker.sock`). When a workflow job runs, act_runner spawns a **sibling container** for each job. That job container also gets the Docker socket mounted (via `valid_volumes` in `runner-config.yaml`), enabling `docker compose` calls in workflow steps.

+### Workspace bind-mount setup (DooD path resolution)
+
+When a workflow step calls `docker compose up` with relative bind-mount sources (e.g. `./infra/observability/prometheus/prometheus.yml`), Compose resolves them against `$(pwd)` inside the job container and passes the resulting **absolute path** to the host Docker daemon. The host daemon then tries to bind-mount that path from the **host filesystem**.
+
+In the default DooD setup the job container's workspace lives in the act_runner overlay2 layer — the host has no directory at that path, auto-creates an empty one, and the container fails with:
+
+```
+error mounting "…/prometheus/prometheus.yml" to rootfs at "/etc/prometheus/prometheus.yml": not a directory
+```
+
+**Solution (ADR-015):** store job workspaces on a real host path and mount it at the **same absolute path** inside the runner and every job container. `runner-config.yaml` configures this via `workdir_parent`, `valid_volumes`, and `options`.
+
+**One-time host setup** (required on any fresh VPS):
+
+```bash
+mkdir -p /srv/gitea-workspace
+# Then add to the runner service in ~/docker/gitea/compose.yaml:
+#   volumes:
+#     - /srv/gitea-workspace:/srv/gitea-workspace
+# Restart the runner container for the change to take effect.
+```
+
+The path `/srv/gitea-workspace` is the canonical workspace root. It must be identical on the host and inside job containers — if the paths differ, Compose still resolves to the container-internal path, which the host daemon cannot find (the original bug).
+
+**Disk management:** act_runner cleans per-run subdirectories on completion. Orphaned directories from interrupted runs accumulate under `/srv/gitea-workspace` and should be pruned manually if disk space becomes a concern:
+
+```bash
+# List workspace directories older than 7 days
+find /srv/gitea-workspace -mindepth 3 -maxdepth 3 -type d -mtime +7
+```
+
+---
+
 ### Running host-level commands from CI (nsenter pattern)

 Job containers are unprivileged and do not share the host's PID/mount/network namespaces. Commands like `systemctl` that target the host daemon are therefore unavailable by default. When a workflow step needs to manage a host service (e.g. `systemctl reload caddy`), it uses the Docker socket to spin up a **privileged sibling container** in the host PID namespace:
@@ -108,6 +141,33 @@ nsenter: failed to execute /bin/systemctl: No such file or directory

 The first error means the Docker socket is not mounted into the job container — check `valid_volumes` in `/root/docker/gitea/runner-config.yaml` on the VPS. The second means the Alpine image is running but cannot enter the host mount namespace; verify `--privileged` and `--pid=host` are both present in the workflow step.

+**Failure mode 4 — workspace bind-mount not configured (observability stack or any compose-with-file-mounts job)**
+
+Symptom in CI log:
+```
+Error response from daemon: error while creating mount source path "…/prometheus/prometheus.yml": mkdir …: not a directory
+```
+
+Or the service starts but immediately crashes because a config file was mounted as an empty directory.
+
+Cause: `/srv/gitea-workspace` does not exist on the host, or the runner container's `compose.yaml` is missing the `- /srv/gitea-workspace:/srv/gitea-workspace` volume line.
+
+Diagnosis:
+```bash
+ssh root@<vps>
+ls -la /srv/gitea-workspace          # must exist and be a directory
+docker inspect gitea-runner | grep -A5 Mounts   # must show /srv/gitea-workspace
+```
+
+Recovery:
+```bash
+mkdir -p /srv/gitea-workspace
+# Add volume line to runner compose.yaml, then:
+docker compose -f ~/docker/gitea/compose.yaml up -d gitea-runner
+```
+
+See `docs/DEPLOYMENT.md §3.1` and ADR-015 for the full setup rationale.
+
 ---

 ## Gitea vs GitHub Actions Differences
--- a/infra/caddy/Caddyfile
+++ b/infra/caddy/Caddyfile
@@ -88,3 +88,13 @@ git.raddatz.cloud {
 	import security_headers
 	reverse_proxy 127.0.0.1:3005
 }
+
+grafana.archiv.raddatz.cloud {
+	import security_headers
+	reverse_proxy 127.0.0.1:3003
+}
+
+glitchtip.archiv.raddatz.cloud {
+	import security_headers
+	reverse_proxy 127.0.0.1:3002
+}
--- a/runner-config.yaml
+++ b/runner-config.yaml
@@ -2,15 +2,25 @@
 container:
  # passed as DOCKER_HOST inside the job container
  docker_host: "unix:///var/run/docker.sock"
-  # whitelists the socket path so workflows can mount it
+  # Job workspaces are stored here on the NAS and mounted at the same
+  # absolute path inside job containers. Identical host ↔ container path
+  # is the requirement: Docker Compose resolves relative bind mounts to
+  # $(pwd) inside the job container and passes that absolute path to the
+  # host daemon — the daemon must find the file at that exact host path.
+  # Prerequisite: mkdir -p /srv/gitea-workspace on the host, and add
+  #   - /srv/gitea-workspace:/srv/gitea-workspace
+  # to the runner service volumes in gitea's compose.yaml.
+  workdir_parent: /srv/gitea-workspace
+  # whitelists volumes that workflow steps may bind-mount
  valid_volumes:
    - "/var/run/docker.sock"
+    - "/srv/gitea-workspace"
  # appended to `docker run` when the runner spawns a job container
  # SECURITY: Mounting the Docker socket grants job containers root-equivalent
  # access to the host Docker daemon. Acceptable here because only trusted code
  # from this private repo runs on this runner. Do NOT use on a runner that
  # accepts untrusted PRs from external contributors.
-  options: "-v /var/run/docker.sock:/var/run/docker.sock"
+  options: "-v /var/run/docker.sock:/var/run/docker.sock -v /srv/gitea-workspace:/srv/gitea-workspace"
  # keep network mode default (bridge) — Testcontainers handles its own networking
  force_pull: false