From 4a537d6b19e3b99e5c61aa21b3df3685592dfe97 Mon Sep 17 00:00:00 2001 From: Marcel Date: Mon, 11 May 2026 18:57:47 +0200 Subject: [PATCH 1/7] feat(infra): bind-mount /import for backend mass-import endpoint MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `MassImportService` reads the ODS spreadsheet and referenced PDFs from a hardcoded `/import` path inside the backend container. Dev compose already bind-mounts `./import:/import`, but the prod compose had no equivalent, so `POST /api/admin/import` would always fail on staging/prod with "no spreadsheet found". Mount strategy: - Source path is env-driven (`IMPORT_HOST_DIR`), defaulting to `/srv/familienarchiv/import` so the host path is stable across CI deploys (the compose working dir is recreated each run, so `./import` would not persist). - Read-only — `MassImportService` only reads (`Files.list` / `Files.walk`), never writes. Read-only mount makes that contract explicit and prevents the backend container from mutating the source PDFs. - Empty / missing path is harmless: the import API just returns the existing "no spreadsheet found" error rather than crashing the container. To use on staging: rsync the import folder to `/srv/familienarchiv-staging/import/` on the host, set `IMPORT_HOST_DIR=/srv/familienarchiv-staging/import` in `.env.staging`, redeploy, trigger import from `/admin/system`. Co-Authored-By: Claude Opus 4.7 --- docker-compose.prod.yml | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/docker-compose.prod.yml b/docker-compose.prod.yml index e56ac956..b4044014 100644 --- a/docker-compose.prod.yml +++ b/docker-compose.prod.yml @@ -26,6 +26,14 @@ # MAIL_HOST, MAIL_PORT, SMTP relay (production only; staging uses mailpit) # MAIL_USERNAME, MAIL_PASSWORD # APP_MAIL_FROM sender address (e.g. noreply@raddatz.cloud) +# +# Optional env vars: +# IMPORT_HOST_DIR absolute host path holding the ODS spreadsheet +# and PDFs for /admin/system mass import. Mounted +# read-only at /import inside the backend. +# Defaults to /srv/familienarchiv/import. When the +# path is empty the import API simply reports +# "no spreadsheet found" — no crash. networks: archiv-net: @@ -173,6 +181,12 @@ services: # Bound to localhost only — Caddy fronts external traffic. ports: - "127.0.0.1:${PORT_BACKEND}:8080" + # Host path holding the ODS spreadsheet + PDFs for the mass-import endpoint. + # Read-only; MassImportService only reads (Files.list / Files.walk on /import). + # Outside the compose working dir on purpose — that dir is recreated per CI + # deploy. See IMPORT_HOST_DIR in the header for the env-var contract. + volumes: + - ${IMPORT_HOST_DIR:-/srv/familienarchiv/import}:/import:ro environment: SPRING_DATASOURCE_URL: jdbc:postgresql://db:5432/archiv SPRING_DATASOURCE_USERNAME: archiv -- 2.49.1 From ff20721dee23ac59aa32b953eee7f1735ace0886 Mon Sep 17 00:00:00 2001 From: Marcel Date: Mon, 11 May 2026 20:02:45 +0200 Subject: [PATCH 2/7] refactor(import): make import directory @Value-configurable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The hardcoded `static final String IMPORT_DIR = "/import"` was the only non-`@Value` configurable input in MassImportService — every column index next to it is wired through `app.import.col.*`. Lifts the contract from infrastructure (compose bind mount) into application config (`app.import.dir`), with `/import` as the default so the existing bind-mount path keeps working. Addresses review feedback from Markus and Felix on #526. Co-Authored-By: Claude Opus 4.7 --- .../importing/MassImportService.java | 10 ++++++---- .../importing/MassImportServiceTest.java | 14 ++++++++++++++ 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/backend/src/main/java/org/raddatz/familienarchiv/importing/MassImportService.java b/backend/src/main/java/org/raddatz/familienarchiv/importing/MassImportService.java index e35d05cc..f58233ff 100644 --- a/backend/src/main/java/org/raddatz/familienarchiv/importing/MassImportService.java +++ b/backend/src/main/java/org/raddatz/familienarchiv/importing/MassImportService.java @@ -99,7 +99,9 @@ public class MassImportService { @Value("${app.import.col.transcription:13}") private int colTranscription; - private static final String IMPORT_DIR = "/import"; + @Value("${app.import.dir:/import}") + private String importDir; + private static final DateTimeFormatter GERMAN_DATE = DateTimeFormatter.ofPattern("d. MMMM yyyy", Locale.GERMAN); // ODS XML namespaces @@ -129,7 +131,7 @@ public class MassImportService { } private File findSpreadsheetFile() throws IOException { - try (Stream files = Files.list(Paths.get(IMPORT_DIR))) { + try (Stream files = Files.list(Paths.get(importDir))) { return files .filter(p -> { String name = p.toString().toLowerCase(); @@ -137,7 +139,7 @@ public class MassImportService { }) .findFirst() .orElseThrow(() -> new RuntimeException( - "Keine Tabellendatei (.ods/.xlsx/.xls) in " + IMPORT_DIR + " gefunden!")) + "Keine Tabellendatei (.ods/.xlsx/.xls) in " + importDir + " gefunden!")) .toFile(); } } @@ -378,7 +380,7 @@ public class MassImportService { } private Optional findFileRecursive(String filename) { - try (Stream walk = Files.walk(Paths.get(IMPORT_DIR))) { + try (Stream walk = Files.walk(Paths.get(importDir))) { return walk.filter(p -> !Files.isDirectory(p)) .filter(p -> p.getFileName().toString().equals(filename)) .map(Path::toFile) diff --git a/backend/src/test/java/org/raddatz/familienarchiv/importing/MassImportServiceTest.java b/backend/src/test/java/org/raddatz/familienarchiv/importing/MassImportServiceTest.java index 7470f651..a5fe50c6 100644 --- a/backend/src/test/java/org/raddatz/familienarchiv/importing/MassImportServiceTest.java +++ b/backend/src/test/java/org/raddatz/familienarchiv/importing/MassImportServiceTest.java @@ -50,6 +50,7 @@ class MassImportServiceTest { void setUp() { service = new MassImportService(documentService, personService, tagService, s3Client, thumbnailAsyncRunner); ReflectionTestUtils.setField(service, "bucketName", "test-bucket"); + ReflectionTestUtils.setField(service, "importDir", "/import"); ReflectionTestUtils.setField(service, "colIndex", 0); ReflectionTestUtils.setField(service, "colBox", 1); ReflectionTestUtils.setField(service, "colFolder", 2); @@ -79,6 +80,19 @@ class MassImportServiceTest { assertThat(service.getStatus().state()).isEqualTo(MassImportService.State.FAILED); } + @Test + void runImportAsync_readsFromConfiguredImportDir(@TempDir Path tempDir) { + // Empty temp dir → findSpreadsheetFile throws "no spreadsheet" with the + // configured path in the message. Proves the field, not a constant, + // drives the lookup. + ReflectionTestUtils.setField(service, "importDir", tempDir.toString()); + + service.runImportAsync(); + + assertThat(service.getStatus().state()).isEqualTo(MassImportService.State.FAILED); + assertThat(service.getStatus().message()).contains(tempDir.toString()); + } + @Test void runImportAsync_throwsConflict_whenAlreadyRunning() { MassImportService.ImportStatus running = new MassImportService.ImportStatus( -- 2.49.1 From cdb5db6c68c33653f2d908c1212db5eaeddf35cc Mon Sep 17 00:00:00 2001 From: Marcel Date: Mon, 11 May 2026 20:03:57 +0200 Subject: [PATCH 3/7] fix(compose): require IMPORT_HOST_DIR, no default MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Tobias and Markus both flagged that a shared default (/srv/familienarchiv/ import) invites silent collision when staging and prod cohabit one host. Switch to ${IMPORT_HOST_DIR:?...} so compose refuses to start without an explicit per-env path — collision becomes structurally impossible. The error message points operators at docs/DEPLOYMENT.md so the recovery step is one click away. IMPORT_HOST_DIR moves from "Optional" to the main required-env-vars block in the header. Addresses review feedback from Markus, Tobias, and Nora on #526. Co-Authored-By: Claude Opus 4.7 --- docker-compose.prod.yml | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/docker-compose.prod.yml b/docker-compose.prod.yml index b4044014..e8687d45 100644 --- a/docker-compose.prod.yml +++ b/docker-compose.prod.yml @@ -26,14 +26,15 @@ # MAIL_HOST, MAIL_PORT, SMTP relay (production only; staging uses mailpit) # MAIL_USERNAME, MAIL_PASSWORD # APP_MAIL_FROM sender address (e.g. noreply@raddatz.cloud) -# -# Optional env vars: -# IMPORT_HOST_DIR absolute host path holding the ODS spreadsheet -# and PDFs for /admin/system mass import. Mounted -# read-only at /import inside the backend. -# Defaults to /srv/familienarchiv/import. When the -# path is empty the import API simply reports -# "no spreadsheet found" — no crash. +# IMPORT_HOST_DIR absolute host path holding ONLY the ODS +# spreadsheet and PDFs for /admin/system mass +# import — mounted read-only at /import inside +# the backend. Compose refuses to start when +# this var is unset, so staging and prod cannot +# accidentally share an import source. Must be +# readable by the backend container's UID +# (currently root via the OpenJDK image — any +# world-readable directory works). networks: archiv-net: @@ -183,10 +184,10 @@ services: - "127.0.0.1:${PORT_BACKEND}:8080" # Host path holding the ODS spreadsheet + PDFs for the mass-import endpoint. # Read-only; MassImportService only reads (Files.list / Files.walk on /import). - # Outside the compose working dir on purpose — that dir is recreated per CI - # deploy. See IMPORT_HOST_DIR in the header for the env-var contract. + # Required — no default — so staging and prod cannot accidentally share an + # import source. CI workflows pin this per-env (see .gitea/workflows/). volumes: - - ${IMPORT_HOST_DIR:-/srv/familienarchiv/import}:/import:ro + - ${IMPORT_HOST_DIR:?Set IMPORT_HOST_DIR to a host path holding the mass-import payload (ODS + PDFs). See docs/DEPLOYMENT.md.}:/import:ro environment: SPRING_DATASOURCE_URL: jdbc:postgresql://db:5432/archiv SPRING_DATASOURCE_USERNAME: archiv -- 2.49.1 From a40267e490b6f7d6e47c942e8ea1c778855f7923 Mon Sep 17 00:00:00 2001 From: Marcel Date: Mon, 11 May 2026 20:05:14 +0200 Subject: [PATCH 4/7] docs(deployment): document IMPORT_HOST_DIR and mass-import workflow MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit DEPLOYMENT.md line 81 declares any compose env var missing from §2 a blocking review comment. IMPORT_HOST_DIR (added on this branch) was unmentioned. Adds the row and rewrites §6.4 so the staging/prod operator workflow (rsync host → set env → trigger import) is in the runbook, not just buried in compose comments. Addresses review feedback from Markus and Tobias on #526. Co-Authored-By: Claude Opus 4.7 --- docs/DEPLOYMENT.md | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/docs/DEPLOYMENT.md b/docs/DEPLOYMENT.md index bd7b7a1a..a2dc55ca 100644 --- a/docs/DEPLOYMENT.md +++ b/docs/DEPLOYMENT.md @@ -97,6 +97,7 @@ All vars are set in `.env` at the repo root (copy from `.env.example`). The back | `APP_BASE_URL` | Public-facing URL for email links | `http://localhost:3000` | YES (prod) | — | | `APP_OCR_BASE_URL` | Internal URL of the OCR service | — | YES | — | | `APP_OCR_TRAINING_TOKEN` | Secret token for OCR training endpoints | — | YES (prod) | YES | +| `IMPORT_HOST_DIR` | Absolute host path holding the ODS spreadsheet + PDFs for the `/admin/system` mass-import card. Mounted read-only at `/import` inside the backend (compose-only — backend reads via `app.import.dir`). Compose refuses to start when unset, so staging and prod cannot accidentally share the source. Convention: `/srv/familienarchiv-staging/import` and `/srv/familienarchiv-production/import` | — | YES (prod compose) | — | | `MAIL_HOST` | SMTP host | `mailpit` (dev) | YES (prod) | — | | `MAIL_PORT` | SMTP port | `1025` (dev) | YES (prod) | — | | `MAIL_USERNAME` | SMTP username | — | YES (prod) | YES | @@ -329,9 +330,18 @@ bash scripts/download-kraken-models.sh ### Trigger a mass import (Excel/ODS) -1. Place the import file in the `import/` bind mount on the backend container. -2. Call `POST /api/admin/trigger-import` (requires `ADMIN` permission). -3. The import runs asynchronously — poll `GET /api/admin/import-status` or watch backend logs. +**Dev:** drop the ODS spreadsheet + PDFs into `./import/` at the repo root — the dev compose bind-mounts it to `/import` automatically. + +**Staging/production:** + +1. Pre-stage the payload on the host. Convention: `/srv/familienarchiv-staging/import/` or `/srv/familienarchiv-production/import/`. + ```bash + rsync -avh --progress ./import/ user@host:/srv/familienarchiv-staging/import/ + ``` +2. Make sure `IMPORT_HOST_DIR=` is set in `.env.staging` / `.env.production` (the nightly/release workflows already write this — see §3). Compose refuses to start without it. +3. Redeploy the stack so the bind mount picks up — or, if the mount is already in place, skip to step 4. +4. Call `POST /api/admin/trigger-import` (requires `ADMIN` permission), or click the "Import starten" button on `/admin/system`. +5. The import runs asynchronously — poll `GET /api/admin/import-status`, watch `/admin/system`, or tail the backend logs. --- -- 2.49.1 From 9703a72e6c5726886779c65cdedf4a242dd3008f Mon Sep 17 00:00:00 2001 From: Marcel Date: Mon, 11 May 2026 20:05:55 +0200 Subject: [PATCH 5/7] ci(nightly): wire IMPORT_HOST_DIR=/srv/familienarchiv-staging/import MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The compose file now requires IMPORT_HOST_DIR or refuses to start (#526). Without this line the next nightly deploy would fail with a clear interpolation error, but it should not fail — the staging import payload already lives at this host path (rsync'd in #526). Addresses Tobias's review on #526. Co-Authored-By: Claude Opus 4.7 --- .gitea/workflows/nightly.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitea/workflows/nightly.yml b/.gitea/workflows/nightly.yml index 814e6fc8..dcf349b2 100644 --- a/.gitea/workflows/nightly.yml +++ b/.gitea/workflows/nightly.yml @@ -73,6 +73,7 @@ jobs: MAIL_SMTP_AUTH=false MAIL_STARTTLS_ENABLE=false APP_MAIL_FROM=noreply@staging.raddatz.cloud + IMPORT_HOST_DIR=/srv/familienarchiv-staging/import EOF - name: Build images -- 2.49.1 From c2c42706c7ed3f2c2111cfbee004a8c2e9e1b191 Mon Sep 17 00:00:00 2001 From: Marcel Date: Mon, 11 May 2026 20:06:33 +0200 Subject: [PATCH 6/7] ci(release): wire IMPORT_HOST_DIR=/srv/familienarchiv-production/import MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Mirrors the staging change. The host directory does not yet exist on the production server — first production release that consumes this will create an empty bind source via Docker's auto-create behaviour; mass import then reports "no spreadsheet found" until an operator pre-stages a payload there. Addresses Tobias's review on #526. Co-Authored-By: Claude Opus 4.7 --- .gitea/workflows/release.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitea/workflows/release.yml b/.gitea/workflows/release.yml index 3831320f..7d2d3618 100644 --- a/.gitea/workflows/release.yml +++ b/.gitea/workflows/release.yml @@ -71,6 +71,7 @@ jobs: MAIL_SMTP_AUTH=true MAIL_STARTTLS_ENABLE=true APP_MAIL_FROM=noreply@raddatz.cloud + IMPORT_HOST_DIR=/srv/familienarchiv-production/import EOF - name: Build images -- 2.49.1 From 3775f4cb52558bc40ac1f9d0178c0cef826974ff Mon Sep 17 00:00:00 2001 From: Marcel Date: Mon, 11 May 2026 20:08:30 +0200 Subject: [PATCH 7/7] ci(nightly): regression guard for backend /import:ro mount MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Sara flagged that a future "compose cleanup" PR could silently drop the backend volumes block and CI would happily pass while mass import on staging silently broke. Adds a pre-build step that renders the staging compose config and fails the deploy if `target: /import` or `read_only: true` is missing. Local verification of the guard: - Volumes block removed → `grep -q 'target: /import'` exits 1 → step fails - Volumes block present → both greps match → step passes Addresses Sara's review on #526. Co-Authored-By: Claude Opus 4.7 --- .gitea/workflows/nightly.yml | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/.gitea/workflows/nightly.yml b/.gitea/workflows/nightly.yml index dcf349b2..da11ebf7 100644 --- a/.gitea/workflows/nightly.yml +++ b/.gitea/workflows/nightly.yml @@ -76,6 +76,28 @@ jobs: IMPORT_HOST_DIR=/srv/familienarchiv-staging/import EOF + - name: Verify backend /import:ro mount is wired + # Regression guard for #526: the /admin/system mass-import card + # only works when the backend service mounts the host import + # payload at /import (read-only). If a future "compose cleanup" + # PR drops the volumes block, mass import silently breaks again. + # `compose config` renders both shorthand and longform mounts as + # `target: /import` + `read_only: true`, so we assert against + # the rendered form rather than the raw source YAML. + run: | + set -e + docker compose \ + -f docker-compose.prod.yml \ + -p archiv-staging \ + --env-file .env.staging \ + --profile staging \ + config > /tmp/compose-rendered.yml + grep -q '^[[:space:]]*target: /import$' /tmp/compose-rendered.yml \ + || { echo "::error::backend is missing the /import bind mount (see #526)"; exit 1; } + grep -A2 '^[[:space:]]*target: /import$' /tmp/compose-rendered.yml \ + | grep -q 'read_only: true' \ + || { echo "::error::backend /import mount is not read-only (see #526)"; exit 1; } + - name: Build images # `--pull` forces re-fetching pinned base images so a CVE # re-publication of the same tag (e.g. node:20.19.0-alpine3.21, -- 2.49.1