From 1fc47888d50128f8d6020305c6fe3c997d2a36c0 Mon Sep 17 00:00:00 2001 From: Marcel Date: Fri, 15 May 2026 19:02:53 +0200 Subject: [PATCH 1/5] fix(ci): sync observability configs to host before docker compose up (#598) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit DooD runner only shares /var/run/docker.sock — no workspace directory is mapped to the host daemon. Relative bind mounts in docker-compose.observability.yml resolved to paths that didn't exist on the host; Docker auto-created directories in their place, causing 'not a directory' mount failures for all five config files. Fix: - docker-compose.observability.yml: replace hardcoded ./infra/observability/ prefix with ${OBS_CONFIG_DIR:-./infra/observability} so the path is configurable while remaining backwards-compatible for local use. - nightly.yml / release.yml: add a 'Sync observability configs to host' step that finds the job container's overlay2 MergedDir (the container's full filesystem as seen from the host mount namespace), then uses the existing nsenter/alpine pattern to cp the config tree into a stable host path (/srv/familienarchiv-{staging,production}/obs-configs). OBS_CONFIG_DIR is injected into the env file so Compose picks it up. Co-Authored-By: Claude Sonnet 4.6 --- .gitea/workflows/nightly.yml | 19 +++++++++++++++++++ .gitea/workflows/release.yml | 11 +++++++++++ docker-compose.observability.yml | 10 +++++----- 3 files changed, 35 insertions(+), 5 deletions(-) diff --git a/.gitea/workflows/nightly.yml b/.gitea/workflows/nightly.yml index 81cf885c..af5e1750 100644 --- a/.gitea/workflows/nightly.yml +++ b/.gitea/workflows/nightly.yml @@ -85,6 +85,7 @@ jobs: GLITCHTIP_SECRET_KEY=${{ secrets.GLITCHTIP_SECRET_KEY }} GLITCHTIP_DOMAIN=https://glitchtip.archiv.raddatz.cloud SENTRY_DSN=${{ secrets.SENTRY_DSN }} + OBS_CONFIG_DIR=/srv/familienarchiv-staging/obs-configs EOF - name: Verify backend /import:ro mount is wired @@ -131,6 +132,24 @@ jobs: --profile staging \ up -d --wait --remove-orphans + - name: Sync observability configs to host + # DooD: runner-config.yaml only shares /var/run/docker.sock with the host + # daemon — no workspace directory is mapped. Relative bind mounts in + # docker-compose.observability.yml would resolve to paths that don't + # exist on the host; Docker auto-creates directories in their place, + # causing "not a directory" mount failures at container startup. + # + # Fix: find the job container's overlay2 merged directory (visible in the + # host's mount namespace), then use nsenter to copy from there into a + # stable host path. The overlay path is the job container's full + # filesystem as seen from the host — no socket tricks needed. + run: | + OVERLAY=$(docker inspect "$(hostname)" --format '{{.GraphDriver.Data.MergedDir}}') + SRC="${OVERLAY}$(pwd)/infra/observability" + docker run --rm --privileged --pid=host \ + alpine:3.21@sha256:48b0309ca019d89d40f670aa1bc06e426dc0931948452e8491e3d65087abc07d \ + sh -c "nsenter -t 1 -m -- sh -c 'mkdir -p /srv/familienarchiv-staging/obs-configs && cp -r \"${SRC}/.\" /srv/familienarchiv-staging/obs-configs/'" + - name: Start observability stack run: | docker compose \ diff --git a/.gitea/workflows/release.yml b/.gitea/workflows/release.yml index 2645dc15..4237c9cf 100644 --- a/.gitea/workflows/release.yml +++ b/.gitea/workflows/release.yml @@ -83,6 +83,7 @@ jobs: GLITCHTIP_SECRET_KEY=${{ secrets.GLITCHTIP_SECRET_KEY }} GLITCHTIP_DOMAIN=https://glitchtip.archiv.raddatz.cloud SENTRY_DSN=${{ secrets.SENTRY_DSN }} + OBS_CONFIG_DIR=/srv/familienarchiv-production/obs-configs EOF - name: Build images @@ -104,6 +105,16 @@ jobs: --env-file .env.production \ up -d --wait --remove-orphans + - name: Sync observability configs to host + # DooD: same overlay2 trick as nightly.yml — see that file for the + # full rationale. Production path: /srv/familienarchiv-production/obs-configs. + run: | + OVERLAY=$(docker inspect "$(hostname)" --format '{{.GraphDriver.Data.MergedDir}}') + SRC="${OVERLAY}$(pwd)/infra/observability" + docker run --rm --privileged --pid=host \ + alpine:3.21@sha256:48b0309ca019d89d40f670aa1bc06e426dc0931948452e8491e3d65087abc07d \ + sh -c "nsenter -t 1 -m -- sh -c 'mkdir -p /srv/familienarchiv-production/obs-configs && cp -r \"${SRC}/.\" /srv/familienarchiv-production/obs-configs/'" + - name: Start observability stack run: | docker compose \ diff --git a/docker-compose.observability.yml b/docker-compose.observability.yml index b83cb439..2da53c3b 100644 --- a/docker-compose.observability.yml +++ b/docker-compose.observability.yml @@ -16,7 +16,7 @@ services: container_name: obs-prometheus restart: unless-stopped volumes: - - ./infra/observability/prometheus/prometheus.yml:/etc/prometheus/prometheus.yml:ro + - ${OBS_CONFIG_DIR:-./infra/observability}/prometheus/prometheus.yml:/etc/prometheus/prometheus.yml:ro - prometheus_data:/prometheus command: - '--config.file=/etc/prometheus/prometheus.yml' @@ -79,7 +79,7 @@ services: container_name: obs-loki restart: unless-stopped volumes: - - ./infra/observability/loki/loki-config.yml:/etc/loki/loki-config.yml:ro + - ${OBS_CONFIG_DIR:-./infra/observability}/loki/loki-config.yml:/etc/loki/loki-config.yml:ro - loki_data:/loki command: -config.file=/etc/loki/loki-config.yml expose: @@ -98,7 +98,7 @@ services: container_name: obs-promtail restart: unless-stopped volumes: - - ./infra/observability/promtail/promtail-config.yml:/etc/promtail/promtail-config.yml:ro + - ${OBS_CONFIG_DIR:-./infra/observability}/promtail/promtail-config.yml:/etc/promtail/promtail-config.yml:ro - /var/lib/docker/containers:/var/lib/docker/containers:ro # :ro restricts file-system access but NOT Docker API permissions — a compromised Promtail has full daemon access. Accepted risk on single-operator self-hosted archive. - /var/run/docker.sock:/var/run/docker.sock:ro @@ -118,7 +118,7 @@ services: container_name: obs-tempo restart: unless-stopped volumes: - - ./infra/observability/tempo/tempo.yml:/etc/tempo.yml:ro + - ${OBS_CONFIG_DIR:-./infra/observability}/tempo/tempo.yml:/etc/tempo.yml:ro - tempo_data:/var/tempo command: -config.file=/etc/tempo.yml expose: @@ -148,7 +148,7 @@ services: GF_USERS_ALLOW_SIGN_UP: "false" volumes: - grafana_data:/var/lib/grafana - - ./infra/observability/grafana/provisioning:/etc/grafana/provisioning:ro + - ${OBS_CONFIG_DIR:-./infra/observability}/grafana/provisioning:/etc/grafana/provisioning:ro healthcheck: test: ["CMD-SHELL", "wget -qO- http://localhost:3000/api/health | grep -q ok || exit 1"] interval: 30s -- 2.49.1 From 2cc8b1174ba80293945a4f7fd36792241ea38a4f Mon Sep 17 00:00:00 2001 From: Marcel Date: Fri, 15 May 2026 19:33:36 +0200 Subject: [PATCH 2/5] fix(ci): configure workspace bind mount for DooD bind-mount resolution MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Set workdir_parent to /volume1/gitea-workspace so act_runner stores job workspaces at a real NAS path. Mounting that path at the same absolute location in job containers means $(pwd) inside any job container resolves to a host path the daemon can find — no overlay2 tricks needed. Prerequisite (NAS): mkdir -p /volume1/gitea-workspace and add - /volume1/gitea-workspace:/volume1/gitea-workspace to the runner service volumes in gitea's docker-compose.yml, then restart the runner. Co-Authored-By: Claude Sonnet 4.6 --- runner-config.yaml | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/runner-config.yaml b/runner-config.yaml index 66bb616c..68c4dc7a 100644 --- a/runner-config.yaml +++ b/runner-config.yaml @@ -2,15 +2,25 @@ container: # passed as DOCKER_HOST inside the job container docker_host: "unix:///var/run/docker.sock" - # whitelists the socket path so workflows can mount it + # Job workspaces are stored here on the NAS and mounted at the same + # absolute path inside job containers. Identical host ↔ container path + # is the requirement: Docker Compose resolves relative bind mounts to + # $(pwd) inside the job container and passes that absolute path to the + # host daemon — the daemon must find the file at that exact host path. + # Prerequisite: mkdir -p /volume1/gitea-workspace on the NAS, and add + # - /volume1/gitea-workspace:/volume1/gitea-workspace + # to the runner service volumes in gitea's docker-compose.yml. + workdir_parent: /volume1/gitea-workspace + # whitelists volumes that workflow steps may bind-mount valid_volumes: - "/var/run/docker.sock" + - "/volume1/gitea-workspace" # appended to `docker run` when the runner spawns a job container # SECURITY: Mounting the Docker socket grants job containers root-equivalent # access to the host Docker daemon. Acceptable here because only trusted code # from this private repo runs on this runner. Do NOT use on a runner that # accepts untrusted PRs from external contributors. - options: "-v /var/run/docker.sock:/var/run/docker.sock" + options: "-v /var/run/docker.sock:/var/run/docker.sock -v /volume1/gitea-workspace:/volume1/gitea-workspace" # keep network mode default (bridge) — Testcontainers handles its own networking force_pull: false -- 2.49.1 From 56c3e51657752973863481432564b4bfd65847b7 Mon Sep 17 00:00:00 2001 From: Marcel Date: Fri, 15 May 2026 19:36:55 +0200 Subject: [PATCH 3/5] fix(ci): replace overlay2 sync with workspace bind mount for DooD MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit runner-config.yaml: correct path to /srv/gitea-workspace (VPS, not Synology). docker-compose.observability.yml: revert 5 bind mounts to plain relative paths; OBS_CONFIG_DIR variable is no longer needed. nightly.yml / release.yml: remove OBS_CONFIG_DIR env injection and the "Sync observability configs to host" step from both workflows. With workdir_parent=/srv/gitea-workspace and an identical host<->container bind mount, $(pwd) inside job containers resolves to a real host path the daemon can find — no privileged container, no overlay2 inspection, no nsenter. Co-Authored-By: Claude Sonnet 4.6 --- .gitea/workflows/nightly.yml | 19 ------------------- .gitea/workflows/release.yml | 11 ----------- docker-compose.observability.yml | 10 +++++----- runner-config.yaml | 12 ++++++------ 4 files changed, 11 insertions(+), 41 deletions(-) diff --git a/.gitea/workflows/nightly.yml b/.gitea/workflows/nightly.yml index af5e1750..81cf885c 100644 --- a/.gitea/workflows/nightly.yml +++ b/.gitea/workflows/nightly.yml @@ -85,7 +85,6 @@ jobs: GLITCHTIP_SECRET_KEY=${{ secrets.GLITCHTIP_SECRET_KEY }} GLITCHTIP_DOMAIN=https://glitchtip.archiv.raddatz.cloud SENTRY_DSN=${{ secrets.SENTRY_DSN }} - OBS_CONFIG_DIR=/srv/familienarchiv-staging/obs-configs EOF - name: Verify backend /import:ro mount is wired @@ -132,24 +131,6 @@ jobs: --profile staging \ up -d --wait --remove-orphans - - name: Sync observability configs to host - # DooD: runner-config.yaml only shares /var/run/docker.sock with the host - # daemon — no workspace directory is mapped. Relative bind mounts in - # docker-compose.observability.yml would resolve to paths that don't - # exist on the host; Docker auto-creates directories in their place, - # causing "not a directory" mount failures at container startup. - # - # Fix: find the job container's overlay2 merged directory (visible in the - # host's mount namespace), then use nsenter to copy from there into a - # stable host path. The overlay path is the job container's full - # filesystem as seen from the host — no socket tricks needed. - run: | - OVERLAY=$(docker inspect "$(hostname)" --format '{{.GraphDriver.Data.MergedDir}}') - SRC="${OVERLAY}$(pwd)/infra/observability" - docker run --rm --privileged --pid=host \ - alpine:3.21@sha256:48b0309ca019d89d40f670aa1bc06e426dc0931948452e8491e3d65087abc07d \ - sh -c "nsenter -t 1 -m -- sh -c 'mkdir -p /srv/familienarchiv-staging/obs-configs && cp -r \"${SRC}/.\" /srv/familienarchiv-staging/obs-configs/'" - - name: Start observability stack run: | docker compose \ diff --git a/.gitea/workflows/release.yml b/.gitea/workflows/release.yml index 4237c9cf..2645dc15 100644 --- a/.gitea/workflows/release.yml +++ b/.gitea/workflows/release.yml @@ -83,7 +83,6 @@ jobs: GLITCHTIP_SECRET_KEY=${{ secrets.GLITCHTIP_SECRET_KEY }} GLITCHTIP_DOMAIN=https://glitchtip.archiv.raddatz.cloud SENTRY_DSN=${{ secrets.SENTRY_DSN }} - OBS_CONFIG_DIR=/srv/familienarchiv-production/obs-configs EOF - name: Build images @@ -105,16 +104,6 @@ jobs: --env-file .env.production \ up -d --wait --remove-orphans - - name: Sync observability configs to host - # DooD: same overlay2 trick as nightly.yml — see that file for the - # full rationale. Production path: /srv/familienarchiv-production/obs-configs. - run: | - OVERLAY=$(docker inspect "$(hostname)" --format '{{.GraphDriver.Data.MergedDir}}') - SRC="${OVERLAY}$(pwd)/infra/observability" - docker run --rm --privileged --pid=host \ - alpine:3.21@sha256:48b0309ca019d89d40f670aa1bc06e426dc0931948452e8491e3d65087abc07d \ - sh -c "nsenter -t 1 -m -- sh -c 'mkdir -p /srv/familienarchiv-production/obs-configs && cp -r \"${SRC}/.\" /srv/familienarchiv-production/obs-configs/'" - - name: Start observability stack run: | docker compose \ diff --git a/docker-compose.observability.yml b/docker-compose.observability.yml index 2da53c3b..b83cb439 100644 --- a/docker-compose.observability.yml +++ b/docker-compose.observability.yml @@ -16,7 +16,7 @@ services: container_name: obs-prometheus restart: unless-stopped volumes: - - ${OBS_CONFIG_DIR:-./infra/observability}/prometheus/prometheus.yml:/etc/prometheus/prometheus.yml:ro + - ./infra/observability/prometheus/prometheus.yml:/etc/prometheus/prometheus.yml:ro - prometheus_data:/prometheus command: - '--config.file=/etc/prometheus/prometheus.yml' @@ -79,7 +79,7 @@ services: container_name: obs-loki restart: unless-stopped volumes: - - ${OBS_CONFIG_DIR:-./infra/observability}/loki/loki-config.yml:/etc/loki/loki-config.yml:ro + - ./infra/observability/loki/loki-config.yml:/etc/loki/loki-config.yml:ro - loki_data:/loki command: -config.file=/etc/loki/loki-config.yml expose: @@ -98,7 +98,7 @@ services: container_name: obs-promtail restart: unless-stopped volumes: - - ${OBS_CONFIG_DIR:-./infra/observability}/promtail/promtail-config.yml:/etc/promtail/promtail-config.yml:ro + - ./infra/observability/promtail/promtail-config.yml:/etc/promtail/promtail-config.yml:ro - /var/lib/docker/containers:/var/lib/docker/containers:ro # :ro restricts file-system access but NOT Docker API permissions — a compromised Promtail has full daemon access. Accepted risk on single-operator self-hosted archive. - /var/run/docker.sock:/var/run/docker.sock:ro @@ -118,7 +118,7 @@ services: container_name: obs-tempo restart: unless-stopped volumes: - - ${OBS_CONFIG_DIR:-./infra/observability}/tempo/tempo.yml:/etc/tempo.yml:ro + - ./infra/observability/tempo/tempo.yml:/etc/tempo.yml:ro - tempo_data:/var/tempo command: -config.file=/etc/tempo.yml expose: @@ -148,7 +148,7 @@ services: GF_USERS_ALLOW_SIGN_UP: "false" volumes: - grafana_data:/var/lib/grafana - - ${OBS_CONFIG_DIR:-./infra/observability}/grafana/provisioning:/etc/grafana/provisioning:ro + - ./infra/observability/grafana/provisioning:/etc/grafana/provisioning:ro healthcheck: test: ["CMD-SHELL", "wget -qO- http://localhost:3000/api/health | grep -q ok || exit 1"] interval: 30s diff --git a/runner-config.yaml b/runner-config.yaml index 68c4dc7a..23bef458 100644 --- a/runner-config.yaml +++ b/runner-config.yaml @@ -7,20 +7,20 @@ container: # is the requirement: Docker Compose resolves relative bind mounts to # $(pwd) inside the job container and passes that absolute path to the # host daemon — the daemon must find the file at that exact host path. - # Prerequisite: mkdir -p /volume1/gitea-workspace on the NAS, and add - # - /volume1/gitea-workspace:/volume1/gitea-workspace - # to the runner service volumes in gitea's docker-compose.yml. - workdir_parent: /volume1/gitea-workspace + # Prerequisite: mkdir -p /srv/gitea-workspace on the host, and add + # - /srv/gitea-workspace:/srv/gitea-workspace + # to the runner service volumes in gitea's compose.yaml. + workdir_parent: /srv/gitea-workspace # whitelists volumes that workflow steps may bind-mount valid_volumes: - "/var/run/docker.sock" - - "/volume1/gitea-workspace" + - "/srv/gitea-workspace" # appended to `docker run` when the runner spawns a job container # SECURITY: Mounting the Docker socket grants job containers root-equivalent # access to the host Docker daemon. Acceptable here because only trusted code # from this private repo runs on this runner. Do NOT use on a runner that # accepts untrusted PRs from external contributors. - options: "-v /var/run/docker.sock:/var/run/docker.sock -v /volume1/gitea-workspace:/volume1/gitea-workspace" + options: "-v /var/run/docker.sock:/var/run/docker.sock -v /srv/gitea-workspace:/srv/gitea-workspace" # keep network mode default (bridge) — Testcontainers handles its own networking force_pull: false -- 2.49.1 From 15ef079eff7511a31ff1ca79836a9de90619847e Mon Sep 17 00:00:00 2001 From: Marcel Date: Fri, 15 May 2026 19:38:18 +0200 Subject: [PATCH 4/5] docs(adr): add ADR-015 for DooD workspace bind-mount approach Documents the decision to use workdir_parent + identical host<->container path instead of the overlay2 MergedDir sync that was in the initial fix. Captures the alternatives (nsenter sync, image-baked configs, path mismatch) and the operational consequences (prereq directory, out-of-band compose.yaml). Co-Authored-By: Claude Sonnet 4.6 --- docs/adr/015-dood-workspace-bind-mount.md | 69 +++++++++++++++++++++++ 1 file changed, 69 insertions(+) create mode 100644 docs/adr/015-dood-workspace-bind-mount.md diff --git a/docs/adr/015-dood-workspace-bind-mount.md b/docs/adr/015-dood-workspace-bind-mount.md new file mode 100644 index 00000000..53bf73d1 --- /dev/null +++ b/docs/adr/015-dood-workspace-bind-mount.md @@ -0,0 +1,69 @@ +# ADR-015: DooD workspace bind mount for Compose file bind-mount resolution + +## Status + +Accepted + +## Context + +The deploy workflows (`.gitea/workflows/nightly.yml`, `release.yml`) run job steps inside Docker containers via Docker-out-of-Docker (DooD): the Gitea runner mounts the host Docker socket, and act_runner spawns sibling containers for each job. + +When a job step calls `docker compose -f docker-compose.observability.yml up`, Docker Compose resolves relative bind-mount sources against `$(pwd)` inside the job container and passes the resulting absolute paths to the **host** daemon. For example, `./infra/observability/prometheus/prometheus.yml` becomes `/some/path/infra/observability/prometheus/prometheus.yml`, and the host daemon tries to bind-mount that path from the **host filesystem**. + +In the default DooD setup (`runner-config.yaml` with only `valid_volumes: ["/var/run/docker.sock"]`), job container workspaces live in the act_runner overlay2 layer. The host has no corresponding directory at the job container's `$(pwd)` path, so the daemon auto-creates an empty directory in its place. The container then fails to start because the mount target was expected to be a file, not a directory: + +``` +error mounting "…/prometheus/prometheus.yml" to rootfs at "/etc/prometheus/prometheus.yml": not a directory +``` + +This affected all five config file bind mounts in `docker-compose.observability.yml`. + +## Decision + +Configure act_runner to store job workspaces on a real host path (`/srv/gitea-workspace`) and mount that path into both the runner container and every job container at the **same absolute path**. The identity of the host path and container path is the key constraint: Compose resolves to an absolute path and hands it to the host daemon, which looks for that exact path on the host filesystem. + +**runner-config.yaml changes:** + +```yaml +container: + workdir_parent: /srv/gitea-workspace + valid_volumes: + - "/var/run/docker.sock" + - "/srv/gitea-workspace" + options: "-v /srv/gitea-workspace:/srv/gitea-workspace" +``` + +**Runner compose.yaml change** (host side — not in this repo): + +```yaml +runner: + volumes: + - /srv/gitea-workspace:/srv/gitea-workspace +``` + +With this in place, `$(pwd)` inside a job container resolves to `/srv/gitea-workspace///`, which is a real directory on the host. Compose-managed bind mounts from that directory work without any additional steps. + +## Alternatives Considered + +| Alternative | Why rejected | +|---|---| +| **overlay2 `MergedDir` sync via privileged nsenter** (the previous approach, see PR #599 v1) | Required `--privileged --pid=host` (effective root on the host) plus fragile overlay2 driver assumption. Introduced stale-file risk on the host and a second stable path (`/srv/familienarchiv-*/obs-configs`) to maintain separately from the source tree. Replaced by this ADR. | +| **Build configs into a dedicated Docker image** (pattern used for MinIO bootstrap, see `infra/minio/Dockerfile`) | Viable for static files that change infrequently. Requires a build step and an image rebuild every time a config changes. Appropriate for bootstrap scripts; too heavy for frequently-tuned observability configs. | +| **Add workspace directory to runner-config `valid_volumes` only** (without `workdir_parent`) | `valid_volumes` whitelists paths that workflow steps may reference, but does not change where act_runner stores workspaces. Without `workdir_parent`, the workspace would still be in overlay2 and the bind-mount resolution problem would remain. | +| **Map workspace under a different host path than container path** (e.g. host `/srv/workspace`, container `/workspace`) | Compose resolves to the container-internal path (e.g. `/workspace/…`) and passes that to the host daemon. The host daemon interprets the source as a host path. If host `/workspace` does not exist, the daemon creates an empty directory — the original bug. The paths must be identical. | + +## Consequences + +- `/srv/gitea-workspace` must exist on the VPS before the runner starts. The directory was created as part of this change; it is not created automatically. +- The runner container's `compose.yaml` (maintained outside this repo at `~/docker/gitea/compose.yaml` on the VPS) must include the `- /srv/gitea-workspace:/srv/gitea-workspace` volume line. This is an out-of-band operational dependency; the prerequisite is documented in `runner-config.yaml`. +- `workdir_parent` applies to all jobs on this runner. Any future workflow that calls `docker compose` with relative bind mounts benefits automatically without further configuration. +- Job workspaces persist across runs under `/srv/gitea-workspace`. act_runner manages per-run subdirectory cleanup. Orphaned directories from interrupted runs should be cleaned up manually if disk space becomes a concern. +- Workflows that previously relied on `OBS_CONFIG_DIR` env var or the `obs-configs` stable path on the host no longer need those. Both were removed in this PR. +- This pattern does **not** apply to the `nsenter`-based Caddy reload step (ADR-012), which manages a host systemd service — a different problem class with no bind-mount equivalent. + +## References + +- ADR-011 — single-tenant runner trust model +- ADR-012 — nsenter via privileged container for host service management +- Issue #598 — original observability stack bind-mount failure +- `runner-config.yaml` — `workdir_parent`, `valid_volumes`, `options` -- 2.49.1 From e4ac5f08e7d1cc2758dc138edead47f8b1ddc78d Mon Sep 17 00:00:00 2001 From: Marcel Date: Fri, 15 May 2026 19:46:54 +0200 Subject: [PATCH 5/5] docs(ci): document workspace bind-mount setup for DooD runners MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add the /srv/gitea-workspace prerequisite step to DEPLOYMENT.md §3.1 and a new "Workspace bind-mount setup" subsection plus failure mode 4 to ci-gitea.md, covering the root cause, one-time host setup, disk management, and troubleshooting for the bind-mount resolution fix introduced in ADR-015. Co-Authored-By: Claude Sonnet 4.6 --- docs/DEPLOYMENT.md | 9 +++++ docs/infrastructure/ci-gitea.md | 60 +++++++++++++++++++++++++++++++++ 2 files changed, 69 insertions(+) diff --git a/docs/DEPLOYMENT.md b/docs/DEPLOYMENT.md index b6845cf3..b906d66f 100644 --- a/docs/DEPLOYMENT.md +++ b/docs/DEPLOYMENT.md @@ -193,6 +193,15 @@ curl -fsSL https://tailscale.com/install.sh | sh && tailscale up # files to disk during execution (cleaned up unconditionally on completion). # A multi-tenant runner would need to switch to stdin-piped env files. # (See https://docs.gitea.com/usage/actions/quickstart for the register step.) + +# Runner workspace directory — required for DooD bind-mount resolution (ADR-015). +# act_runner stores job workspaces here so that docker compose bind mounts resolve +# to real host paths. The path must be identical on the host and inside job containers. +mkdir -p /srv/gitea-workspace +# Also add this volume line to the runner service in ~/docker/gitea/compose.yaml: +# volumes: +# - /srv/gitea-workspace:/srv/gitea-workspace +# See runner-config.yaml (workdir_parent + valid_volumes + options) and ADR-015. ``` ### 3.2 DNS records diff --git a/docs/infrastructure/ci-gitea.md b/docs/infrastructure/ci-gitea.md index b57a719e..8d92890b 100644 --- a/docs/infrastructure/ci-gitea.md +++ b/docs/infrastructure/ci-gitea.md @@ -19,6 +19,39 @@ Both containers live in the `gitea_gitea` Docker network on the VPS. The runner The `gitea-runner` container mounts the host Docker socket (`/var/run/docker.sock`). When a workflow job runs, act_runner spawns a **sibling container** for each job. That job container also gets the Docker socket mounted (via `valid_volumes` in `runner-config.yaml`), enabling `docker compose` calls in workflow steps. +### Workspace bind-mount setup (DooD path resolution) + +When a workflow step calls `docker compose up` with relative bind-mount sources (e.g. `./infra/observability/prometheus/prometheus.yml`), Compose resolves them against `$(pwd)` inside the job container and passes the resulting **absolute path** to the host Docker daemon. The host daemon then tries to bind-mount that path from the **host filesystem**. + +In the default DooD setup the job container's workspace lives in the act_runner overlay2 layer — the host has no directory at that path, auto-creates an empty one, and the container fails with: + +``` +error mounting "…/prometheus/prometheus.yml" to rootfs at "/etc/prometheus/prometheus.yml": not a directory +``` + +**Solution (ADR-015):** store job workspaces on a real host path and mount it at the **same absolute path** inside the runner and every job container. `runner-config.yaml` configures this via `workdir_parent`, `valid_volumes`, and `options`. + +**One-time host setup** (required on any fresh VPS): + +```bash +mkdir -p /srv/gitea-workspace +# Then add to the runner service in ~/docker/gitea/compose.yaml: +# volumes: +# - /srv/gitea-workspace:/srv/gitea-workspace +# Restart the runner container for the change to take effect. +``` + +The path `/srv/gitea-workspace` is the canonical workspace root. It must be identical on the host and inside job containers — if the paths differ, Compose still resolves to the container-internal path, which the host daemon cannot find (the original bug). + +**Disk management:** act_runner cleans per-run subdirectories on completion. Orphaned directories from interrupted runs accumulate under `/srv/gitea-workspace` and should be pruned manually if disk space becomes a concern: + +```bash +# List workspace directories older than 7 days +find /srv/gitea-workspace -mindepth 3 -maxdepth 3 -type d -mtime +7 +``` + +--- + ### Running host-level commands from CI (nsenter pattern) Job containers are unprivileged and do not share the host's PID/mount/network namespaces. Commands like `systemctl` that target the host daemon are therefore unavailable by default. When a workflow step needs to manage a host service (e.g. `systemctl reload caddy`), it uses the Docker socket to spin up a **privileged sibling container** in the host PID namespace: @@ -108,6 +141,33 @@ nsenter: failed to execute /bin/systemctl: No such file or directory The first error means the Docker socket is not mounted into the job container — check `valid_volumes` in `/root/docker/gitea/runner-config.yaml` on the VPS. The second means the Alpine image is running but cannot enter the host mount namespace; verify `--privileged` and `--pid=host` are both present in the workflow step. +**Failure mode 4 — workspace bind-mount not configured (observability stack or any compose-with-file-mounts job)** + +Symptom in CI log: +``` +Error response from daemon: error while creating mount source path "…/prometheus/prometheus.yml": mkdir …: not a directory +``` + +Or the service starts but immediately crashes because a config file was mounted as an empty directory. + +Cause: `/srv/gitea-workspace` does not exist on the host, or the runner container's `compose.yaml` is missing the `- /srv/gitea-workspace:/srv/gitea-workspace` volume line. + +Diagnosis: +```bash +ssh root@ +ls -la /srv/gitea-workspace # must exist and be a directory +docker inspect gitea-runner | grep -A5 Mounts # must show /srv/gitea-workspace +``` + +Recovery: +```bash +mkdir -p /srv/gitea-workspace +# Add volume line to runner compose.yaml, then: +docker compose -f ~/docker/gitea/compose.yaml up -d gitea-runner +``` + +See `docs/DEPLOYMENT.md §3.1` and ADR-015 for the full setup rationale. + --- ## Gitea vs GitHub Actions Differences -- 2.49.1