diff --git a/.gitea/workflows/nightly.yml b/.gitea/workflows/nightly.yml index 814e6fc8..da11ebf7 100644 --- a/.gitea/workflows/nightly.yml +++ b/.gitea/workflows/nightly.yml @@ -73,8 +73,31 @@ jobs: MAIL_SMTP_AUTH=false MAIL_STARTTLS_ENABLE=false APP_MAIL_FROM=noreply@staging.raddatz.cloud + IMPORT_HOST_DIR=/srv/familienarchiv-staging/import EOF + - name: Verify backend /import:ro mount is wired + # Regression guard for #526: the /admin/system mass-import card + # only works when the backend service mounts the host import + # payload at /import (read-only). If a future "compose cleanup" + # PR drops the volumes block, mass import silently breaks again. + # `compose config` renders both shorthand and longform mounts as + # `target: /import` + `read_only: true`, so we assert against + # the rendered form rather than the raw source YAML. + run: | + set -e + docker compose \ + -f docker-compose.prod.yml \ + -p archiv-staging \ + --env-file .env.staging \ + --profile staging \ + config > /tmp/compose-rendered.yml + grep -q '^[[:space:]]*target: /import$' /tmp/compose-rendered.yml \ + || { echo "::error::backend is missing the /import bind mount (see #526)"; exit 1; } + grep -A2 '^[[:space:]]*target: /import$' /tmp/compose-rendered.yml \ + | grep -q 'read_only: true' \ + || { echo "::error::backend /import mount is not read-only (see #526)"; exit 1; } + - name: Build images # `--pull` forces re-fetching pinned base images so a CVE # re-publication of the same tag (e.g. node:20.19.0-alpine3.21, diff --git a/.gitea/workflows/release.yml b/.gitea/workflows/release.yml index 3831320f..7d2d3618 100644 --- a/.gitea/workflows/release.yml +++ b/.gitea/workflows/release.yml @@ -71,6 +71,7 @@ jobs: MAIL_SMTP_AUTH=true MAIL_STARTTLS_ENABLE=true APP_MAIL_FROM=noreply@raddatz.cloud + IMPORT_HOST_DIR=/srv/familienarchiv-production/import EOF - name: Build images diff --git a/backend/src/main/java/org/raddatz/familienarchiv/importing/MassImportService.java b/backend/src/main/java/org/raddatz/familienarchiv/importing/MassImportService.java index e35d05cc..f58233ff 100644 --- a/backend/src/main/java/org/raddatz/familienarchiv/importing/MassImportService.java +++ b/backend/src/main/java/org/raddatz/familienarchiv/importing/MassImportService.java @@ -99,7 +99,9 @@ public class MassImportService { @Value("${app.import.col.transcription:13}") private int colTranscription; - private static final String IMPORT_DIR = "/import"; + @Value("${app.import.dir:/import}") + private String importDir; + private static final DateTimeFormatter GERMAN_DATE = DateTimeFormatter.ofPattern("d. MMMM yyyy", Locale.GERMAN); // ODS XML namespaces @@ -129,7 +131,7 @@ public class MassImportService { } private File findSpreadsheetFile() throws IOException { - try (Stream files = Files.list(Paths.get(IMPORT_DIR))) { + try (Stream files = Files.list(Paths.get(importDir))) { return files .filter(p -> { String name = p.toString().toLowerCase(); @@ -137,7 +139,7 @@ public class MassImportService { }) .findFirst() .orElseThrow(() -> new RuntimeException( - "Keine Tabellendatei (.ods/.xlsx/.xls) in " + IMPORT_DIR + " gefunden!")) + "Keine Tabellendatei (.ods/.xlsx/.xls) in " + importDir + " gefunden!")) .toFile(); } } @@ -378,7 +380,7 @@ public class MassImportService { } private Optional findFileRecursive(String filename) { - try (Stream walk = Files.walk(Paths.get(IMPORT_DIR))) { + try (Stream walk = Files.walk(Paths.get(importDir))) { return walk.filter(p -> !Files.isDirectory(p)) .filter(p -> p.getFileName().toString().equals(filename)) .map(Path::toFile) diff --git a/backend/src/test/java/org/raddatz/familienarchiv/importing/MassImportServiceTest.java b/backend/src/test/java/org/raddatz/familienarchiv/importing/MassImportServiceTest.java index 7470f651..a5fe50c6 100644 --- a/backend/src/test/java/org/raddatz/familienarchiv/importing/MassImportServiceTest.java +++ b/backend/src/test/java/org/raddatz/familienarchiv/importing/MassImportServiceTest.java @@ -50,6 +50,7 @@ class MassImportServiceTest { void setUp() { service = new MassImportService(documentService, personService, tagService, s3Client, thumbnailAsyncRunner); ReflectionTestUtils.setField(service, "bucketName", "test-bucket"); + ReflectionTestUtils.setField(service, "importDir", "/import"); ReflectionTestUtils.setField(service, "colIndex", 0); ReflectionTestUtils.setField(service, "colBox", 1); ReflectionTestUtils.setField(service, "colFolder", 2); @@ -79,6 +80,19 @@ class MassImportServiceTest { assertThat(service.getStatus().state()).isEqualTo(MassImportService.State.FAILED); } + @Test + void runImportAsync_readsFromConfiguredImportDir(@TempDir Path tempDir) { + // Empty temp dir → findSpreadsheetFile throws "no spreadsheet" with the + // configured path in the message. Proves the field, not a constant, + // drives the lookup. + ReflectionTestUtils.setField(service, "importDir", tempDir.toString()); + + service.runImportAsync(); + + assertThat(service.getStatus().state()).isEqualTo(MassImportService.State.FAILED); + assertThat(service.getStatus().message()).contains(tempDir.toString()); + } + @Test void runImportAsync_throwsConflict_whenAlreadyRunning() { MassImportService.ImportStatus running = new MassImportService.ImportStatus( diff --git a/docker-compose.prod.yml b/docker-compose.prod.yml index e56ac956..e8687d45 100644 --- a/docker-compose.prod.yml +++ b/docker-compose.prod.yml @@ -26,6 +26,15 @@ # MAIL_HOST, MAIL_PORT, SMTP relay (production only; staging uses mailpit) # MAIL_USERNAME, MAIL_PASSWORD # APP_MAIL_FROM sender address (e.g. noreply@raddatz.cloud) +# IMPORT_HOST_DIR absolute host path holding ONLY the ODS +# spreadsheet and PDFs for /admin/system mass +# import — mounted read-only at /import inside +# the backend. Compose refuses to start when +# this var is unset, so staging and prod cannot +# accidentally share an import source. Must be +# readable by the backend container's UID +# (currently root via the OpenJDK image — any +# world-readable directory works). networks: archiv-net: @@ -173,6 +182,12 @@ services: # Bound to localhost only — Caddy fronts external traffic. ports: - "127.0.0.1:${PORT_BACKEND}:8080" + # Host path holding the ODS spreadsheet + PDFs for the mass-import endpoint. + # Read-only; MassImportService only reads (Files.list / Files.walk on /import). + # Required — no default — so staging and prod cannot accidentally share an + # import source. CI workflows pin this per-env (see .gitea/workflows/). + volumes: + - ${IMPORT_HOST_DIR:?Set IMPORT_HOST_DIR to a host path holding the mass-import payload (ODS + PDFs). See docs/DEPLOYMENT.md.}:/import:ro environment: SPRING_DATASOURCE_URL: jdbc:postgresql://db:5432/archiv SPRING_DATASOURCE_USERNAME: archiv diff --git a/docs/DEPLOYMENT.md b/docs/DEPLOYMENT.md index bd7b7a1a..a2dc55ca 100644 --- a/docs/DEPLOYMENT.md +++ b/docs/DEPLOYMENT.md @@ -97,6 +97,7 @@ All vars are set in `.env` at the repo root (copy from `.env.example`). The back | `APP_BASE_URL` | Public-facing URL for email links | `http://localhost:3000` | YES (prod) | — | | `APP_OCR_BASE_URL` | Internal URL of the OCR service | — | YES | — | | `APP_OCR_TRAINING_TOKEN` | Secret token for OCR training endpoints | — | YES (prod) | YES | +| `IMPORT_HOST_DIR` | Absolute host path holding the ODS spreadsheet + PDFs for the `/admin/system` mass-import card. Mounted read-only at `/import` inside the backend (compose-only — backend reads via `app.import.dir`). Compose refuses to start when unset, so staging and prod cannot accidentally share the source. Convention: `/srv/familienarchiv-staging/import` and `/srv/familienarchiv-production/import` | — | YES (prod compose) | — | | `MAIL_HOST` | SMTP host | `mailpit` (dev) | YES (prod) | — | | `MAIL_PORT` | SMTP port | `1025` (dev) | YES (prod) | — | | `MAIL_USERNAME` | SMTP username | — | YES (prod) | YES | @@ -329,9 +330,18 @@ bash scripts/download-kraken-models.sh ### Trigger a mass import (Excel/ODS) -1. Place the import file in the `import/` bind mount on the backend container. -2. Call `POST /api/admin/trigger-import` (requires `ADMIN` permission). -3. The import runs asynchronously — poll `GET /api/admin/import-status` or watch backend logs. +**Dev:** drop the ODS spreadsheet + PDFs into `./import/` at the repo root — the dev compose bind-mounts it to `/import` automatically. + +**Staging/production:** + +1. Pre-stage the payload on the host. Convention: `/srv/familienarchiv-staging/import/` or `/srv/familienarchiv-production/import/`. + ```bash + rsync -avh --progress ./import/ user@host:/srv/familienarchiv-staging/import/ + ``` +2. Make sure `IMPORT_HOST_DIR=` is set in `.env.staging` / `.env.production` (the nightly/release workflows already write this — see §3). Compose refuses to start without it. +3. Redeploy the stack so the bind mount picks up — or, if the mount is already in place, skip to step 4. +4. Call `POST /api/admin/trigger-import` (requires `ADMIN` permission), or click the "Import starten" button on `/admin/system`. +5. The import runs asynchronously — poll `GET /api/admin/import-status`, watch `/admin/system`, or tail the backend logs. ---