feat(infra): bind-mount /import for backend mass-import endpoint #526
@@ -73,8 +73,31 @@ jobs:
|
|||||||
MAIL_SMTP_AUTH=false
|
MAIL_SMTP_AUTH=false
|
||||||
MAIL_STARTTLS_ENABLE=false
|
MAIL_STARTTLS_ENABLE=false
|
||||||
APP_MAIL_FROM=noreply@staging.raddatz.cloud
|
APP_MAIL_FROM=noreply@staging.raddatz.cloud
|
||||||
|
IMPORT_HOST_DIR=/srv/familienarchiv-staging/import
|
||||||
EOF
|
EOF
|
||||||
|
|
||||||
|
- name: Verify backend /import:ro mount is wired
|
||||||
|
# Regression guard for #526: the /admin/system mass-import card
|
||||||
|
# only works when the backend service mounts the host import
|
||||||
|
# payload at /import (read-only). If a future "compose cleanup"
|
||||||
|
# PR drops the volumes block, mass import silently breaks again.
|
||||||
|
# `compose config` renders both shorthand and longform mounts as
|
||||||
|
# `target: /import` + `read_only: true`, so we assert against
|
||||||
|
# the rendered form rather than the raw source YAML.
|
||||||
|
run: |
|
||||||
|
set -e
|
||||||
|
docker compose \
|
||||||
|
-f docker-compose.prod.yml \
|
||||||
|
-p archiv-staging \
|
||||||
|
--env-file .env.staging \
|
||||||
|
--profile staging \
|
||||||
|
config > /tmp/compose-rendered.yml
|
||||||
|
grep -q '^[[:space:]]*target: /import$' /tmp/compose-rendered.yml \
|
||||||
|
|| { echo "::error::backend is missing the /import bind mount (see #526)"; exit 1; }
|
||||||
|
grep -A2 '^[[:space:]]*target: /import$' /tmp/compose-rendered.yml \
|
||||||
|
| grep -q 'read_only: true' \
|
||||||
|
|| { echo "::error::backend /import mount is not read-only (see #526)"; exit 1; }
|
||||||
|
|
||||||
- name: Build images
|
- name: Build images
|
||||||
# `--pull` forces re-fetching pinned base images so a CVE
|
# `--pull` forces re-fetching pinned base images so a CVE
|
||||||
# re-publication of the same tag (e.g. node:20.19.0-alpine3.21,
|
# re-publication of the same tag (e.g. node:20.19.0-alpine3.21,
|
||||||
|
|||||||
@@ -71,6 +71,7 @@ jobs:
|
|||||||
MAIL_SMTP_AUTH=true
|
MAIL_SMTP_AUTH=true
|
||||||
MAIL_STARTTLS_ENABLE=true
|
MAIL_STARTTLS_ENABLE=true
|
||||||
APP_MAIL_FROM=noreply@raddatz.cloud
|
APP_MAIL_FROM=noreply@raddatz.cloud
|
||||||
|
IMPORT_HOST_DIR=/srv/familienarchiv-production/import
|
||||||
EOF
|
EOF
|
||||||
|
|
||||||
- name: Build images
|
- name: Build images
|
||||||
|
|||||||
@@ -99,7 +99,9 @@ public class MassImportService {
|
|||||||
@Value("${app.import.col.transcription:13}")
|
@Value("${app.import.col.transcription:13}")
|
||||||
private int colTranscription;
|
private int colTranscription;
|
||||||
|
|
||||||
private static final String IMPORT_DIR = "/import";
|
@Value("${app.import.dir:/import}")
|
||||||
|
private String importDir;
|
||||||
|
|
||||||
private static final DateTimeFormatter GERMAN_DATE = DateTimeFormatter.ofPattern("d. MMMM yyyy", Locale.GERMAN);
|
private static final DateTimeFormatter GERMAN_DATE = DateTimeFormatter.ofPattern("d. MMMM yyyy", Locale.GERMAN);
|
||||||
|
|
||||||
// ODS XML namespaces
|
// ODS XML namespaces
|
||||||
@@ -129,7 +131,7 @@ public class MassImportService {
|
|||||||
}
|
}
|
||||||
|
|
||||||
private File findSpreadsheetFile() throws IOException {
|
private File findSpreadsheetFile() throws IOException {
|
||||||
try (Stream<Path> files = Files.list(Paths.get(IMPORT_DIR))) {
|
try (Stream<Path> files = Files.list(Paths.get(importDir))) {
|
||||||
return files
|
return files
|
||||||
.filter(p -> {
|
.filter(p -> {
|
||||||
String name = p.toString().toLowerCase();
|
String name = p.toString().toLowerCase();
|
||||||
@@ -137,7 +139,7 @@ public class MassImportService {
|
|||||||
})
|
})
|
||||||
.findFirst()
|
.findFirst()
|
||||||
.orElseThrow(() -> new RuntimeException(
|
.orElseThrow(() -> new RuntimeException(
|
||||||
"Keine Tabellendatei (.ods/.xlsx/.xls) in " + IMPORT_DIR + " gefunden!"))
|
"Keine Tabellendatei (.ods/.xlsx/.xls) in " + importDir + " gefunden!"))
|
||||||
.toFile();
|
.toFile();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -378,7 +380,7 @@ public class MassImportService {
|
|||||||
}
|
}
|
||||||
|
|
||||||
private Optional<File> findFileRecursive(String filename) {
|
private Optional<File> findFileRecursive(String filename) {
|
||||||
try (Stream<Path> walk = Files.walk(Paths.get(IMPORT_DIR))) {
|
try (Stream<Path> walk = Files.walk(Paths.get(importDir))) {
|
||||||
return walk.filter(p -> !Files.isDirectory(p))
|
return walk.filter(p -> !Files.isDirectory(p))
|
||||||
.filter(p -> p.getFileName().toString().equals(filename))
|
.filter(p -> p.getFileName().toString().equals(filename))
|
||||||
.map(Path::toFile)
|
.map(Path::toFile)
|
||||||
|
|||||||
@@ -50,6 +50,7 @@ class MassImportServiceTest {
|
|||||||
void setUp() {
|
void setUp() {
|
||||||
service = new MassImportService(documentService, personService, tagService, s3Client, thumbnailAsyncRunner);
|
service = new MassImportService(documentService, personService, tagService, s3Client, thumbnailAsyncRunner);
|
||||||
ReflectionTestUtils.setField(service, "bucketName", "test-bucket");
|
ReflectionTestUtils.setField(service, "bucketName", "test-bucket");
|
||||||
|
ReflectionTestUtils.setField(service, "importDir", "/import");
|
||||||
ReflectionTestUtils.setField(service, "colIndex", 0);
|
ReflectionTestUtils.setField(service, "colIndex", 0);
|
||||||
ReflectionTestUtils.setField(service, "colBox", 1);
|
ReflectionTestUtils.setField(service, "colBox", 1);
|
||||||
ReflectionTestUtils.setField(service, "colFolder", 2);
|
ReflectionTestUtils.setField(service, "colFolder", 2);
|
||||||
@@ -79,6 +80,19 @@ class MassImportServiceTest {
|
|||||||
assertThat(service.getStatus().state()).isEqualTo(MassImportService.State.FAILED);
|
assertThat(service.getStatus().state()).isEqualTo(MassImportService.State.FAILED);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void runImportAsync_readsFromConfiguredImportDir(@TempDir Path tempDir) {
|
||||||
|
// Empty temp dir → findSpreadsheetFile throws "no spreadsheet" with the
|
||||||
|
// configured path in the message. Proves the field, not a constant,
|
||||||
|
// drives the lookup.
|
||||||
|
ReflectionTestUtils.setField(service, "importDir", tempDir.toString());
|
||||||
|
|
||||||
|
service.runImportAsync();
|
||||||
|
|
||||||
|
assertThat(service.getStatus().state()).isEqualTo(MassImportService.State.FAILED);
|
||||||
|
assertThat(service.getStatus().message()).contains(tempDir.toString());
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
void runImportAsync_throwsConflict_whenAlreadyRunning() {
|
void runImportAsync_throwsConflict_whenAlreadyRunning() {
|
||||||
MassImportService.ImportStatus running = new MassImportService.ImportStatus(
|
MassImportService.ImportStatus running = new MassImportService.ImportStatus(
|
||||||
|
|||||||
@@ -26,6 +26,15 @@
|
|||||||
# MAIL_HOST, MAIL_PORT, SMTP relay (production only; staging uses mailpit)
|
# MAIL_HOST, MAIL_PORT, SMTP relay (production only; staging uses mailpit)
|
||||||
# MAIL_USERNAME, MAIL_PASSWORD
|
# MAIL_USERNAME, MAIL_PASSWORD
|
||||||
# APP_MAIL_FROM sender address (e.g. noreply@raddatz.cloud)
|
# APP_MAIL_FROM sender address (e.g. noreply@raddatz.cloud)
|
||||||
|
# IMPORT_HOST_DIR absolute host path holding ONLY the ODS
|
||||||
|
# spreadsheet and PDFs for /admin/system mass
|
||||||
|
# import — mounted read-only at /import inside
|
||||||
|
# the backend. Compose refuses to start when
|
||||||
|
# this var is unset, so staging and prod cannot
|
||||||
|
# accidentally share an import source. Must be
|
||||||
|
# readable by the backend container's UID
|
||||||
|
# (currently root via the OpenJDK image — any
|
||||||
|
# world-readable directory works).
|
||||||
|
|
||||||
networks:
|
networks:
|
||||||
archiv-net:
|
archiv-net:
|
||||||
@@ -173,6 +182,12 @@ services:
|
|||||||
# Bound to localhost only — Caddy fronts external traffic.
|
# Bound to localhost only — Caddy fronts external traffic.
|
||||||
ports:
|
ports:
|
||||||
- "127.0.0.1:${PORT_BACKEND}:8080"
|
- "127.0.0.1:${PORT_BACKEND}:8080"
|
||||||
|
# Host path holding the ODS spreadsheet + PDFs for the mass-import endpoint.
|
||||||
|
# Read-only; MassImportService only reads (Files.list / Files.walk on /import).
|
||||||
|
# Required — no default — so staging and prod cannot accidentally share an
|
||||||
|
# import source. CI workflows pin this per-env (see .gitea/workflows/).
|
||||||
|
volumes:
|
||||||
|
- ${IMPORT_HOST_DIR:?Set IMPORT_HOST_DIR to a host path holding the mass-import payload (ODS + PDFs). See docs/DEPLOYMENT.md.}:/import:ro
|
||||||
environment:
|
environment:
|
||||||
SPRING_DATASOURCE_URL: jdbc:postgresql://db:5432/archiv
|
SPRING_DATASOURCE_URL: jdbc:postgresql://db:5432/archiv
|
||||||
SPRING_DATASOURCE_USERNAME: archiv
|
SPRING_DATASOURCE_USERNAME: archiv
|
||||||
|
|||||||
@@ -97,6 +97,7 @@ All vars are set in `.env` at the repo root (copy from `.env.example`). The back
|
|||||||
| `APP_BASE_URL` | Public-facing URL for email links | `http://localhost:3000` | YES (prod) | — |
|
| `APP_BASE_URL` | Public-facing URL for email links | `http://localhost:3000` | YES (prod) | — |
|
||||||
| `APP_OCR_BASE_URL` | Internal URL of the OCR service | — | YES | — |
|
| `APP_OCR_BASE_URL` | Internal URL of the OCR service | — | YES | — |
|
||||||
| `APP_OCR_TRAINING_TOKEN` | Secret token for OCR training endpoints | — | YES (prod) | YES |
|
| `APP_OCR_TRAINING_TOKEN` | Secret token for OCR training endpoints | — | YES (prod) | YES |
|
||||||
|
| `IMPORT_HOST_DIR` | Absolute host path holding the ODS spreadsheet + PDFs for the `/admin/system` mass-import card. Mounted read-only at `/import` inside the backend (compose-only — backend reads via `app.import.dir`). Compose refuses to start when unset, so staging and prod cannot accidentally share the source. Convention: `/srv/familienarchiv-staging/import` and `/srv/familienarchiv-production/import` | — | YES (prod compose) | — |
|
||||||
| `MAIL_HOST` | SMTP host | `mailpit` (dev) | YES (prod) | — |
|
| `MAIL_HOST` | SMTP host | `mailpit` (dev) | YES (prod) | — |
|
||||||
| `MAIL_PORT` | SMTP port | `1025` (dev) | YES (prod) | — |
|
| `MAIL_PORT` | SMTP port | `1025` (dev) | YES (prod) | — |
|
||||||
| `MAIL_USERNAME` | SMTP username | — | YES (prod) | YES |
|
| `MAIL_USERNAME` | SMTP username | — | YES (prod) | YES |
|
||||||
@@ -329,9 +330,18 @@ bash scripts/download-kraken-models.sh
|
|||||||
|
|
||||||
### Trigger a mass import (Excel/ODS)
|
### Trigger a mass import (Excel/ODS)
|
||||||
|
|
||||||
1. Place the import file in the `import/` bind mount on the backend container.
|
**Dev:** drop the ODS spreadsheet + PDFs into `./import/` at the repo root — the dev compose bind-mounts it to `/import` automatically.
|
||||||
2. Call `POST /api/admin/trigger-import` (requires `ADMIN` permission).
|
|
||||||
3. The import runs asynchronously — poll `GET /api/admin/import-status` or watch backend logs.
|
**Staging/production:**
|
||||||
|
|
||||||
|
1. Pre-stage the payload on the host. Convention: `/srv/familienarchiv-staging/import/` or `/srv/familienarchiv-production/import/`.
|
||||||
|
```bash
|
||||||
|
rsync -avh --progress ./import/ user@host:/srv/familienarchiv-staging/import/
|
||||||
|
```
|
||||||
|
2. Make sure `IMPORT_HOST_DIR=<host-path>` is set in `.env.staging` / `.env.production` (the nightly/release workflows already write this — see §3). Compose refuses to start without it.
|
||||||
|
3. Redeploy the stack so the bind mount picks up — or, if the mount is already in place, skip to step 4.
|
||||||
|
4. Call `POST /api/admin/trigger-import` (requires `ADMIN` permission), or click the "Import starten" button on `/admin/system`.
|
||||||
|
5. The import runs asynchronously — poll `GET /api/admin/import-status`, watch `/admin/system`, or tail the backend logs.
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user