feat(infra): bind-mount /import for backend mass-import endpoint #526

Merged
marcel merged 7 commits from feat/prod-import-bind-mount into main 2026-05-11 20:55:43 +02:00
6 changed files with 72 additions and 7 deletions

View File

@@ -73,8 +73,31 @@ jobs:
MAIL_SMTP_AUTH=false
MAIL_STARTTLS_ENABLE=false
APP_MAIL_FROM=noreply@staging.raddatz.cloud
IMPORT_HOST_DIR=/srv/familienarchiv-staging/import
EOF
- name: Verify backend /import:ro mount is wired
# Regression guard for #526: the /admin/system mass-import card
# only works when the backend service mounts the host import
# payload at /import (read-only). If a future "compose cleanup"
# PR drops the volumes block, mass import silently breaks again.
# `compose config` renders both shorthand and longform mounts as
# `target: /import` + `read_only: true`, so we assert against
# the rendered form rather than the raw source YAML.
run: |
set -e
docker compose \
-f docker-compose.prod.yml \
-p archiv-staging \
--env-file .env.staging \
--profile staging \
config > /tmp/compose-rendered.yml
grep -q '^[[:space:]]*target: /import$' /tmp/compose-rendered.yml \
|| { echo "::error::backend is missing the /import bind mount (see #526)"; exit 1; }
grep -A2 '^[[:space:]]*target: /import$' /tmp/compose-rendered.yml \
| grep -q 'read_only: true' \
|| { echo "::error::backend /import mount is not read-only (see #526)"; exit 1; }
- name: Build images
# `--pull` forces re-fetching pinned base images so a CVE
# re-publication of the same tag (e.g. node:20.19.0-alpine3.21,

View File

@@ -71,6 +71,7 @@ jobs:
MAIL_SMTP_AUTH=true
MAIL_STARTTLS_ENABLE=true
APP_MAIL_FROM=noreply@raddatz.cloud
IMPORT_HOST_DIR=/srv/familienarchiv-production/import
EOF
- name: Build images

View File

@@ -99,7 +99,9 @@ public class MassImportService {
@Value("${app.import.col.transcription:13}")
private int colTranscription;
private static final String IMPORT_DIR = "/import";
@Value("${app.import.dir:/import}")
private String importDir;
private static final DateTimeFormatter GERMAN_DATE = DateTimeFormatter.ofPattern("d. MMMM yyyy", Locale.GERMAN);
// ODS XML namespaces
@@ -129,7 +131,7 @@ public class MassImportService {
}
private File findSpreadsheetFile() throws IOException {
try (Stream<Path> files = Files.list(Paths.get(IMPORT_DIR))) {
try (Stream<Path> files = Files.list(Paths.get(importDir))) {
return files
.filter(p -> {
String name = p.toString().toLowerCase();
@@ -137,7 +139,7 @@ public class MassImportService {
})
.findFirst()
.orElseThrow(() -> new RuntimeException(
"Keine Tabellendatei (.ods/.xlsx/.xls) in " + IMPORT_DIR + " gefunden!"))
"Keine Tabellendatei (.ods/.xlsx/.xls) in " + importDir + " gefunden!"))
.toFile();
}
}
@@ -378,7 +380,7 @@ public class MassImportService {
}
private Optional<File> findFileRecursive(String filename) {
try (Stream<Path> walk = Files.walk(Paths.get(IMPORT_DIR))) {
try (Stream<Path> walk = Files.walk(Paths.get(importDir))) {
return walk.filter(p -> !Files.isDirectory(p))
.filter(p -> p.getFileName().toString().equals(filename))
.map(Path::toFile)

View File

@@ -50,6 +50,7 @@ class MassImportServiceTest {
void setUp() {
service = new MassImportService(documentService, personService, tagService, s3Client, thumbnailAsyncRunner);
ReflectionTestUtils.setField(service, "bucketName", "test-bucket");
ReflectionTestUtils.setField(service, "importDir", "/import");
ReflectionTestUtils.setField(service, "colIndex", 0);
ReflectionTestUtils.setField(service, "colBox", 1);
ReflectionTestUtils.setField(service, "colFolder", 2);
@@ -79,6 +80,19 @@ class MassImportServiceTest {
assertThat(service.getStatus().state()).isEqualTo(MassImportService.State.FAILED);
}
@Test
void runImportAsync_readsFromConfiguredImportDir(@TempDir Path tempDir) {
// Empty temp dir → findSpreadsheetFile throws "no spreadsheet" with the
// configured path in the message. Proves the field, not a constant,
// drives the lookup.
ReflectionTestUtils.setField(service, "importDir", tempDir.toString());
service.runImportAsync();
assertThat(service.getStatus().state()).isEqualTo(MassImportService.State.FAILED);
assertThat(service.getStatus().message()).contains(tempDir.toString());
}
@Test
void runImportAsync_throwsConflict_whenAlreadyRunning() {
MassImportService.ImportStatus running = new MassImportService.ImportStatus(

View File

@@ -26,6 +26,15 @@
# MAIL_HOST, MAIL_PORT, SMTP relay (production only; staging uses mailpit)
# MAIL_USERNAME, MAIL_PASSWORD
# APP_MAIL_FROM sender address (e.g. noreply@raddatz.cloud)
# IMPORT_HOST_DIR absolute host path holding ONLY the ODS
# spreadsheet and PDFs for /admin/system mass
# import — mounted read-only at /import inside
# the backend. Compose refuses to start when
# this var is unset, so staging and prod cannot
# accidentally share an import source. Must be
# readable by the backend container's UID
# (currently root via the OpenJDK image — any
# world-readable directory works).
networks:
archiv-net:
@@ -173,6 +182,12 @@ services:
# Bound to localhost only — Caddy fronts external traffic.
ports:
- "127.0.0.1:${PORT_BACKEND}:8080"
# Host path holding the ODS spreadsheet + PDFs for the mass-import endpoint.
# Read-only; MassImportService only reads (Files.list / Files.walk on /import).
# Required — no default — so staging and prod cannot accidentally share an
# import source. CI workflows pin this per-env (see .gitea/workflows/).
volumes:
- ${IMPORT_HOST_DIR:?Set IMPORT_HOST_DIR to a host path holding the mass-import payload (ODS + PDFs). See docs/DEPLOYMENT.md.}:/import:ro
environment:
SPRING_DATASOURCE_URL: jdbc:postgresql://db:5432/archiv
SPRING_DATASOURCE_USERNAME: archiv

View File

@@ -97,6 +97,7 @@ All vars are set in `.env` at the repo root (copy from `.env.example`). The back
| `APP_BASE_URL` | Public-facing URL for email links | `http://localhost:3000` | YES (prod) | — |
| `APP_OCR_BASE_URL` | Internal URL of the OCR service | — | YES | — |
| `APP_OCR_TRAINING_TOKEN` | Secret token for OCR training endpoints | — | YES (prod) | YES |
| `IMPORT_HOST_DIR` | Absolute host path holding the ODS spreadsheet + PDFs for the `/admin/system` mass-import card. Mounted read-only at `/import` inside the backend (compose-only — backend reads via `app.import.dir`). Compose refuses to start when unset, so staging and prod cannot accidentally share the source. Convention: `/srv/familienarchiv-staging/import` and `/srv/familienarchiv-production/import` | — | YES (prod compose) | — |
| `MAIL_HOST` | SMTP host | `mailpit` (dev) | YES (prod) | — |
| `MAIL_PORT` | SMTP port | `1025` (dev) | YES (prod) | — |
| `MAIL_USERNAME` | SMTP username | — | YES (prod) | YES |
@@ -329,9 +330,18 @@ bash scripts/download-kraken-models.sh
### Trigger a mass import (Excel/ODS)
1. Place the import file in the `import/` bind mount on the backend container.
2. Call `POST /api/admin/trigger-import` (requires `ADMIN` permission).
3. The import runs asynchronously — poll `GET /api/admin/import-status` or watch backend logs.
**Dev:** drop the ODS spreadsheet + PDFs into `./import/` at the repo root — the dev compose bind-mounts it to `/import` automatically.
**Staging/production:**
1. Pre-stage the payload on the host. Convention: `/srv/familienarchiv-staging/import/` or `/srv/familienarchiv-production/import/`.
```bash
rsync -avh --progress ./import/ user@host:/srv/familienarchiv-staging/import/
```
2. Make sure `IMPORT_HOST_DIR=<host-path>` is set in `.env.staging` / `.env.production` (the nightly/release workflows already write this — see §3). Compose refuses to start without it.
3. Redeploy the stack so the bind mount picks up — or, if the mount is already in place, skip to step 4.
4. Call `POST /api/admin/trigger-import` (requires `ADMIN` permission), or click the "Import starten" button on `/admin/system`.
5. The import runs asynchronously — poll `GET /api/admin/import-status`, watch `/admin/system`, or tail the backend logs.
---