feat(importing): rebuild importer as modular canonical loaders (Phase 3, #669) #674

Merged
marcel merged 19 commits from feature/669-modular-importer into docs/import-migration 2026-05-27 11:34:29 +02:00
39 changed files with 2712 additions and 1470 deletions

View File

@@ -87,7 +87,7 @@ backend/src/main/java/org/raddatz/familienarchiv/
├── exception/ DomainException, ErrorCode, GlobalExceptionHandler ├── exception/ DomainException, ErrorCode, GlobalExceptionHandler
├── filestorage/ FileService (S3/MinIO) ├── filestorage/ FileService (S3/MinIO)
├── geschichte/ Geschichte (story) domain ├── geschichte/ Geschichte (story) domain
├── importing/ MassImportService ├── importing/ CanonicalImportOrchestrator + four loaders (TagTree/PersonRegister/PersonTree/Document) + CanonicalSheetReader
├── notification/ Notification domain + SseEmitterRegistry ├── notification/ Notification domain + SseEmitterRegistry
├── ocr/ OCR domain — OcrService, OcrBatchService, training ├── ocr/ OCR domain — OcrService, OcrBatchService, training
├── person/ Person domain ├── person/ Person domain

View File

@@ -34,7 +34,7 @@ src/main/java/org/raddatz/familienarchiv/
├── exception/ # DomainException, ErrorCode, GlobalExceptionHandler ├── exception/ # DomainException, ErrorCode, GlobalExceptionHandler
├── filestorage/ # FileService (S3/MinIO) ├── filestorage/ # FileService (S3/MinIO)
├── geschichte/ # Geschichte (story) domain ├── geschichte/ # Geschichte (story) domain
├── importing/ # MassImportService ├── importing/ # CanonicalImportOrchestrator + 4 loaders + CanonicalSheetReader
├── notification/ # Notification domain + SseEmitterRegistry ├── notification/ # Notification domain + SseEmitterRegistry
├── ocr/ # OCR domain — OcrService, OcrBatchService, training ├── ocr/ # OCR domain — OcrService, OcrBatchService, training
├── person/ # Person domain — Person, PersonService, PersonController ├── person/ # Person domain — Person, PersonService, PersonController

View File

@@ -40,6 +40,8 @@ public enum ErrorCode {
// --- Import --- // --- Import ---
/** A mass import is already in progress; only one can run at a time. 409 */ /** A mass import is already in progress; only one can run at a time. 409 */
IMPORT_ALREADY_RUNNING, IMPORT_ALREADY_RUNNING,
/** A canonical import artifact is missing, unreadable, or missing a required header. 400 */
IMPORT_ARTIFACT_INVALID,
// --- Thumbnails --- // --- Thumbnails ---
/** A thumbnail backfill is already in progress; only one can run at a time. 409 */ /** A thumbnail backfill is already in progress; only one can run at a time. 409 */

View File

@@ -0,0 +1,94 @@
package org.raddatz.familienarchiv.importing;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.raddatz.familienarchiv.exception.DomainException;
import org.raddatz.familienarchiv.exception.ErrorCode;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.scheduling.annotation.Async;
import org.springframework.stereotype.Service;
import java.io.File;
import java.time.LocalDateTime;
import java.util.List;
/**
* Runs the four canonical loaders in their real dependency order — encoded explicitly
* here, not implied by call order — and owns the async runner plus the {@link ImportStatus}
* state machine the admin UI consumes. The orchestrator smoke-checks that all four
* artifacts are present before starting, failing fast rather than half-loading tags but no
* documents. A malformed artifact (a loader throwing) sets {@code FAILED}; an individual
* bad file is surfaced through the {@link ImportStatus.SkippedFile} mechanism instead.
*/
@Service
@RequiredArgsConstructor
@Slf4j
public class CanonicalImportOrchestrator {
private static final String TAG_TREE_ARTIFACT = "canonical-tag-tree.xlsx";
private static final String PERSONS_ARTIFACT = "canonical-persons.xlsx";
private static final String PERSONS_TREE_ARTIFACT = "canonical-persons-tree.json";
private static final String DOCUMENTS_ARTIFACT = "canonical-documents.xlsx";
private final TagTreeImporter tagTreeImporter;
private final PersonRegisterImporter personRegisterImporter;
private final PersonTreeImporter personTreeImporter;
private final DocumentImporter documentImporter;
@Value("${app.import.dir:/import}")
private String canonicalDir;
private volatile ImportStatus currentStatus = new ImportStatus(
ImportStatus.State.IDLE, "IMPORT_IDLE", "Kein Import gestartet.", 0, List.of(), null);
public ImportStatus getStatus() {
return currentStatus;
}
@Async
public void runImportAsync() {
if (currentStatus.state() == ImportStatus.State.RUNNING) {
throw DomainException.conflict(ErrorCode.IMPORT_ALREADY_RUNNING, "A mass import is already in progress");
}
runImport();
}
/** Synchronous entry point — wrapped by {@link #runImportAsync()} and called directly in tests. */
void runImport() {
currentStatus = new ImportStatus(ImportStatus.State.RUNNING, "IMPORT_RUNNING",
"Import läuft...", 0, List.of(), LocalDateTime.now());
try {
File tagTree = requireArtifact(TAG_TREE_ARTIFACT);
File persons = requireArtifact(PERSONS_ARTIFACT);
File personsTree = requireArtifact(PERSONS_TREE_ARTIFACT);
File documents = requireArtifact(DOCUMENTS_ARTIFACT);
// Dependency DAG: documents need persons + tags; the tree needs persons.
tagTreeImporter.load(tagTree);
personRegisterImporter.load(persons);
personTreeImporter.load(personsTree);
DocumentImporter.LoadResult result = documentImporter.load(documents);
currentStatus = new ImportStatus(ImportStatus.State.DONE, "IMPORT_DONE",
"Import abgeschlossen. " + result.processed() + " Dokumente verarbeitet.",
result.processed(), result.skippedFiles(), currentStatus.startedAt());
} catch (DomainException e) {
log.error("Canonical import failed: {}", e.getMessage());
currentStatus = new ImportStatus(ImportStatus.State.FAILED, "IMPORT_FAILED_ARTIFACT",
"Fehler: " + e.getMessage(), 0, List.of(), currentStatus.startedAt());
} catch (Exception e) {
log.error("Canonical import failed", e);
currentStatus = new ImportStatus(ImportStatus.State.FAILED, "IMPORT_FAILED_INTERNAL",
"Fehler: " + e.getMessage(), 0, List.of(), currentStatus.startedAt());
}
}
private File requireArtifact(String name) {
File artifact = new File(canonicalDir, name);
if (!artifact.isFile()) {
throw DomainException.badRequest(ErrorCode.IMPORT_ARTIFACT_INVALID,
"Missing canonical artifact: " + name);
}
return artifact;
}
}

View File

@@ -0,0 +1,133 @@
package org.raddatz.familienarchiv.importing;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.DateUtil;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.ss.usermodel.WorkbookFactory;
import org.raddatz.familienarchiv.exception.DomainException;
import org.raddatz.familienarchiv.exception.ErrorCode;
import java.io.File;
import java.io.FileInputStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
/**
* Value-level POI helper for the canonical import artifacts. No Spring, no domain
* knowledge: it opens a workbook, maps the header row to column indices by name, and
* yields typed rows whose cells are looked up by header name — the seam that replaces
* the old positional {@code @Value app.import.col.*} indices. List columns are split on
* the pipe delimiter the normalizer emits.
*/
public final class CanonicalSheetReader {
private CanonicalSheetReader() {
}
/** A single data row, addressable by canonical header name (never by index). */
public static final class Row {
private final Map<String, Integer> headerIndex;
private final List<String> cells;
private Row(Map<String, Integer> headerIndex, List<String> cells) {
this.headerIndex = headerIndex;
this.cells = cells;
}
/** Trimmed cell value for the named header, or "" when absent/blank. */
public String get(String header) {
Integer index = headerIndex.get(header);
if (index == null || index >= cells.size()) return "";
String value = cells.get(index);
return value == null ? "" : value.trim();
}
}
/**
* Reads all data rows from the first sheet, validating that every required header is
* present. Throws a fail-closed {@link DomainException} on a missing header so a
* loader never silently maps the wrong column.
*/
public static List<Row> readRows(File file, List<String> requiredHeaders) {
try (FileInputStream fis = new FileInputStream(file);
Workbook workbook = WorkbookFactory.create(fis)) {
Sheet sheet = workbook.getSheetAt(0);
org.apache.poi.ss.usermodel.Row headerRow = sheet.getRow(sheet.getFirstRowNum());
Map<String, Integer> headerIndex = mapHeaders(headerRow);
requireHeaders(file, headerIndex, requiredHeaders);
List<Row> rows = new ArrayList<>();
for (int i = sheet.getFirstRowNum() + 1; i <= sheet.getLastRowNum(); i++) {
org.apache.poi.ss.usermodel.Row poiRow = sheet.getRow(i);
if (poiRow == null) continue;
rows.add(new Row(headerIndex, readCells(poiRow, headerIndex.size())));
}
return rows;
} catch (DomainException e) {
throw e;
} catch (Exception e) {
throw DomainException.badRequest(ErrorCode.IMPORT_ARTIFACT_INVALID,
"Unreadable canonical artifact: " + file.getName());
}
}
/** Splits a pipe-delimited list column into trimmed, non-empty segments. */
public static List<String> splitList(String raw) {
if (raw == null || raw.isBlank()) return List.of();
return Arrays.stream(raw.split("\\|"))
.map(String::trim)
.filter(s -> !s.isEmpty())
.toList();
}
private static Map<String, Integer> mapHeaders(org.apache.poi.ss.usermodel.Row headerRow) {
if (headerRow == null) {
return Map.of();
}
Map<String, Integer> headerIndex = new HashMap<>();
for (int c = 0; c < headerRow.getLastCellNum(); c++) {
String name = cellToString(headerRow.getCell(c)).trim();
if (!name.isEmpty()) headerIndex.putIfAbsent(name, c);
}
return headerIndex;
}
private static void requireHeaders(File file, Map<String, Integer> headerIndex, List<String> requiredHeaders) {
for (String header : requiredHeaders) {
if (!headerIndex.containsKey(header)) {
throw DomainException.badRequest(ErrorCode.IMPORT_ARTIFACT_INVALID,
"Missing required header '" + header + "' in artifact " + file.getName());
}
}
}
private static List<String> readCells(org.apache.poi.ss.usermodel.Row poiRow, int columnCount) {
int width = Math.max(columnCount, poiRow.getLastCellNum());
List<String> cells = new ArrayList<>(width);
for (int c = 0; c < width; c++) {
cells.add(cellToString(poiRow.getCell(c)));
}
return cells;
}
private static String cellToString(Cell cell) {
if (cell == null) return "";
return switch (cell.getCellType()) {
case STRING -> cell.getStringCellValue();
case NUMERIC -> {
if (DateUtil.isCellDateFormatted(cell)) {
yield cell.getLocalDateTimeCellValue().toLocalDate().toString();
}
yield String.valueOf((long) cell.getNumericCellValue());
}
case BOOLEAN -> String.valueOf(cell.getBooleanCellValue());
default -> "";
};
}
}

View File

@@ -0,0 +1,334 @@
package org.raddatz.familienarchiv.importing;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.raddatz.familienarchiv.document.DatePrecision;
import org.raddatz.familienarchiv.document.Document;
import org.raddatz.familienarchiv.document.DocumentService;
import org.raddatz.familienarchiv.document.DocumentStatus;
import org.raddatz.familienarchiv.document.ThumbnailAsyncRunner;
import org.raddatz.familienarchiv.exception.DomainException;
import org.raddatz.familienarchiv.exception.ErrorCode;
import org.raddatz.familienarchiv.person.Person;
import org.raddatz.familienarchiv.person.PersonService;
import org.raddatz.familienarchiv.person.PersonType;
import org.raddatz.familienarchiv.person.PersonUpsertCommand;
import org.raddatz.familienarchiv.tag.Tag;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Component;
import org.springframework.transaction.annotation.Transactional;
import software.amazon.awssdk.core.sync.RequestBody;
import software.amazon.awssdk.services.s3.S3Client;
import software.amazon.awssdk.services.s3.model.PutObjectRequest;
import org.raddatz.familienarchiv.tag.TagService;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.time.LocalDate;
import java.time.format.DateTimeParseException;
import java.util.ArrayList;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Optional;
import java.util.Set;
import java.util.UUID;
import java.util.stream.Stream;
/**
* Loads {@code canonical-documents.xlsx} into the document domain. Java performs no
* semantic transformation: the normalizer already resolved people to slugs and dates to
* ISO values. This loader maps columns by header name, routes each attribution
* register-first (always retaining the raw cell in {@code sender_text}/{@code receiver_text}),
* parses clean dates, and keeps the file/S3/thumbnail plumbing.
*
* <p>The {@code file} value is hostile input regardless of upstream trust (CWE-22 does not
* care that it came from our Python tool): its basename is validated with
* {@link #isValidImportFilename} and then resolved with canonical-path containment in
* {@link #findFileRecursive}.
*/
@Component
@RequiredArgsConstructor
@Slf4j
public class DocumentImporter {
static final List<String> REQUIRED_HEADERS = List.of(
"index", "file", "sender_person_id", "sender_name",
"receiver_person_ids", "receiver_names", "date_iso", "date_raw", "date_precision");
private final DocumentService documentService;
private final PersonService personService;
private final TagService tagService;
private final S3Client s3Client;
private final ThumbnailAsyncRunner thumbnailAsyncRunner;
@Value("${app.s3.bucket:familienarchiv}")
private String bucketName;
@Value("${app.import.dir:/import}")
private String importDir;
/** Outcome of loading the document sheet: processed count + per-file skips. */
public record LoadResult(int processed, List<ImportStatus.SkippedFile> skippedFiles) {}
// One transaction for the whole sheet keeps the Hibernate session open so an existing
// document's lazy receivers collection initialises during an idempotent re-import.
// Invoked cross-bean from the orchestrator, so the @Transactional proxy applies.
@Transactional
public LoadResult load(File artifact) {
List<CanonicalSheetReader.Row> rows = CanonicalSheetReader.readRows(artifact, REQUIRED_HEADERS);
int processed = 0;
List<ImportStatus.SkippedFile> skipped = new ArrayList<>();
for (CanonicalSheetReader.Row row : rows) {
String index = row.get("index");
if (index.isBlank()) continue;
Optional<ImportStatus.SkipReason> skipReason = importRow(row, index, skipped);
if (skipReason.isPresent()) {
skipped.add(new ImportStatus.SkippedFile(displayName(row, index), skipReason.get()));
} else {
processed++;
}
}
log.info("Imported {} documents from {} ({} skipped)", processed, artifact.getName(), skipped.size());
return new LoadResult(processed, skipped);
}
private Optional<ImportStatus.SkipReason> importRow(CanonicalSheetReader.Row row, String index,
List<ImportStatus.SkippedFile> skipped) {
Optional<File> resolved;
try {
resolved = resolveFile(row.get("file"));
} catch (InvalidImportFilenameException e) {
log.warn("Skipping import row {}: filename rejected", index);
return Optional.of(ImportStatus.SkipReason.INVALID_FILENAME_PATH_TRAVERSAL);
}
if (resolved.isPresent()) {
try {
if (!isPdfMagicBytes(resolved.get())) {
return Optional.of(ImportStatus.SkipReason.INVALID_PDF_SIGNATURE);
}
} catch (IOException e) {
log.error("Magic-byte check failed for row {}", index, e);
return Optional.of(ImportStatus.SkipReason.FILE_READ_ERROR);
}
}
return persist(row, index, resolved);
}
private Optional<ImportStatus.SkipReason> persist(CanonicalSheetReader.Row row, String index, Optional<File> file) {
Document existing = documentService.findByOriginalFilename(index).orElse(null);
if (existing != null && existing.getStatus() != DocumentStatus.PLACEHOLDER) {
return Optional.of(ImportStatus.SkipReason.ALREADY_EXISTS);
}
String s3Key = null;
String contentType = null;
DocumentStatus status = DocumentStatus.PLACEHOLDER;
if (file.isPresent()) {
contentType = probeContentType(file.get());
s3Key = "documents/" + UUID.randomUUID() + "_" + file.get().getName();
try {
uploadToS3(file.get(), s3Key, contentType);
status = DocumentStatus.UPLOADED;
} catch (Exception e) {
log.error("S3 upload failed for {}", file.get().getName(), e);
return Optional.of(ImportStatus.SkipReason.S3_UPLOAD_FAILED);
}
}
Document doc = buildDocument(row, index, existing, s3Key, contentType, status);
Document saved = documentService.save(doc);
if (file.isPresent()) {
thumbnailAsyncRunner.dispatchAfterCommit(saved.getId());
}
return Optional.empty();
}
private Document buildDocument(CanonicalSheetReader.Row row, String index, Document existing,
String s3Key, String contentType, DocumentStatus status) {
Document doc = existing != null ? existing
: Document.builder().originalFilename(index).build();
String senderName = row.get("sender_name");
String receiverNames = row.get("receiver_names");
Person sender = resolveSender(row.get("sender_person_id"), senderName);
Set<Person> receivers = resolveReceivers(row.get("receiver_person_ids"));
doc.setTitle(index);
doc.setStatus(status);
doc.setFilePath(s3Key);
doc.setContentType(contentType);
doc.setSender(sender);
doc.setSenderText(blankToNull(senderName));
// The canonical row is authoritative for receivers/tags (ADR-025): clear then
// re-populate so a shrunk set on re-import prunes stale links rather than
// accumulating them. The raw sender_text/receiver_text retention is separate.
doc.getReceivers().clear();
doc.getReceivers().addAll(receivers);
doc.setReceiverText(blankToNull(receiverNames));
doc.setDocumentDate(parseIsoDate(row.get("date_iso")));
doc.setMetaDatePrecision(parsePrecision(row.get("date_precision")));
doc.setMetaDateEnd(parseIsoDate(row.get("date_end")));
doc.setMetaDateRaw(blankToNull(row.get("date_raw")));
doc.setLocation(blankToNull(row.get("location")));
doc.setSummary(blankToNull(row.get("summary")));
attachTag(doc, row.get("tags"));
doc.setMetadataComplete(doc.getDocumentDate() != null || sender != null || !receivers.isEmpty());
return doc;
}
// ─── attribution routing — register-first, always retain raw ─────────────────────
private Person resolveSender(String slug, String rawName) {
if (slug.isBlank()) return null;
return resolvePerson(slug, rawName);
}
private Set<Person> resolveReceivers(String slugs) {
Set<Person> receivers = new LinkedHashSet<>();
for (String slug : CanonicalSheetReader.splitList(slugs)) {
receivers.add(resolvePerson(slug, slug));
}
return receivers;
}
private Person resolvePerson(String slug, String rawName) {
return personService.findBySourceRef(slug)
.orElseGet(() -> personService.upsertBySourceRef(PersonUpsertCommand.builder()
.sourceRef(slug)
.lastName(blankToNull(rawName) == null ? slug : rawName)
.personType(PersonType.PERSON)
.provisional(true)
.build()));
}
// Authoritative: the canonical row defines the document's tags exactly. Clearing first
// means a tag removed from the row is pruned on re-import (ADR-025).
private void attachTag(Document doc, String tagPath) {
doc.getTags().clear();
if (tagPath.isBlank()) return;
tagService.findBySourceRef(tagPath).ifPresent(tag -> doc.getTags().add(tag));
}
// ─── clean-value parsing (no semantic logic) ─────────────────────────────────────
private static LocalDate parseIsoDate(String value) {
if (value == null || value.isBlank()) return null;
try {
return LocalDate.parse(value.trim());
} catch (DateTimeParseException e) {
return null;
}
}
private static DatePrecision parsePrecision(String value) {
if (value == null || value.isBlank()) return DatePrecision.UNKNOWN;
try {
return DatePrecision.valueOf(value.trim());
} catch (IllegalArgumentException e) {
return DatePrecision.UNKNOWN;
}
}
// ─── file handling + S3 (small ≤20-line methods) ─────────────────────────────────
private Optional<File> resolveFile(String fileColumn) {
if (fileColumn == null || fileColumn.isBlank()) return Optional.empty();
String basename = basenameOf(fileColumn);
if (!isValidImportFilename(basename)) {
throw new InvalidImportFilenameException();
}
return findFileRecursive(basename);
}
private static String basenameOf(String fileColumn) {
String normalized = fileColumn.replace('\\', '/');
int lastSlash = normalized.lastIndexOf('/');
return lastSlash < 0 ? normalized.trim() : normalized.substring(lastSlash + 1).trim();
}
private String probeContentType(File file) {
try {
String probed = Files.probeContentType(file.toPath());
return probed != null ? probed : "application/octet-stream";
} catch (IOException e) {
return "application/octet-stream";
}
}
private void uploadToS3(File file, String s3Key, String contentType) {
s3Client.putObject(PutObjectRequest.builder()
.bucket(bucketName)
.key(s3Key)
.contentType(contentType)
.build(),
RequestBody.fromFile(file));
}
// ─── security guards — ported verbatim from MassImportService — do not weaken ────
private boolean isValidImportFilename(String filename) {
if (filename == null || filename.isBlank()) return false;
if (filename.contains("/")) return false;
if (filename.contains("\\")) return false;
if (filename.contains("")) return false; // U+2215 DIVISION SLASH
if (filename.contains("")) return false; // U+FF0F FULLWIDTH SOLIDUS
if (filename.contains("")) return false; // U+29F5 REVERSE SOLIDUS OPERATOR
if (filename.contains("..")) return false;
if (filename.equals(".")) return false;
if (filename.contains("\0")) return false;
if (Paths.get(filename).isAbsolute()) return false;
return true;
}
// package-private: a Mockito spy in tests can override to inject IOException
InputStream openFileStream(File file) throws IOException {
return new FileInputStream(file);
}
private boolean isPdfMagicBytes(File file) throws IOException {
try (InputStream is = openFileStream(file)) {
byte[] header = is.readNBytes(4);
return header.length == 4
&& header[0] == 0x25 // %
&& header[1] == 0x50 // P
&& header[2] == 0x44 // D
&& header[3] == 0x46; // F
}
}
private Optional<File> findFileRecursive(String filename) {
File baseDir = new File(importDir);
try (Stream<Path> walk = Files.walk(baseDir.toPath())) {
Optional<Path> match = walk.filter(p -> !Files.isDirectory(p))
.filter(p -> p.getFileName().toString().equals(filename))
.findFirst();
if (match.isEmpty()) return Optional.empty();
File candidate = match.get().toFile();
String baseDirCanonical = baseDir.getCanonicalPath();
if (!candidate.getCanonicalPath().startsWith(baseDirCanonical + File.separator)) {
throw DomainException.internal(ErrorCode.INTERNAL_ERROR, "Path escape detected: " + candidate);
}
return Optional.of(candidate);
} catch (IOException e) {
return Optional.empty();
}
}
private static String displayName(CanonicalSheetReader.Row row, String index) {
String file = row.get("file");
return file.isBlank() ? index : basenameOf(file);
}
private static String blankToNull(String s) {
return (s == null || s.isBlank()) ? null : s;
}
private static final class InvalidImportFilenameException extends RuntimeException {
}
}

View File

@@ -0,0 +1,50 @@
package org.raddatz.familienarchiv.importing;
import com.fasterxml.jackson.annotation.JsonIgnore;
import com.fasterxml.jackson.annotation.JsonProperty;
import io.swagger.v3.oas.annotations.media.Schema;
import java.time.LocalDateTime;
import java.util.List;
/**
* Async import state surfaced to {@code admin/system/ImportStatusCard.svelte} via the
* generated types. The shape ({@code state, statusCode, processed, skippedFiles, skipped})
* is kept verbatim from the retired MassImportService so the admin UI keeps working.
*/
public record ImportStatus(
@Schema(requiredMode = Schema.RequiredMode.REQUIRED) State state,
@Schema(requiredMode = Schema.RequiredMode.REQUIRED) String statusCode,
@JsonIgnore String message,
@Schema(requiredMode = Schema.RequiredMode.REQUIRED) int processed,
@Schema(requiredMode = Schema.RequiredMode.REQUIRED) List<SkippedFile> skippedFiles,
LocalDateTime startedAt
) {
public enum State { IDLE, RUNNING, DONE, FAILED }
public enum SkipReason {
INVALID_FILENAME_PATH_TRAVERSAL,
INVALID_PDF_SIGNATURE,
FILE_READ_ERROR,
ALREADY_EXISTS,
S3_UPLOAD_FAILED
}
public record SkippedFile(
@Schema(requiredMode = Schema.RequiredMode.REQUIRED) String filename,
@Schema(requiredMode = Schema.RequiredMode.REQUIRED) SkipReason reason
) {}
// Note: @Schema on a record accessor method is not picked up by SpringDoc; the
// "skipped" count is a computed convenience field derived from skippedFiles.size().
@JsonProperty("skipped")
public int skipped() {
return skippedFiles.size();
}
/** Defensive-copy constructor — callers cannot mutate the stored list after construction. */
public ImportStatus {
skippedFiles = List.copyOf(skippedFiles);
}
}

View File

@@ -1,509 +0,0 @@
package org.raddatz.familienarchiv.importing;
import com.fasterxml.jackson.annotation.JsonIgnore;
import com.fasterxml.jackson.annotation.JsonProperty;
import io.swagger.v3.oas.annotations.media.Schema;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.apache.poi.ss.usermodel.*;
import java.util.Objects;
import org.raddatz.familienarchiv.exception.DomainException;
import org.raddatz.familienarchiv.exception.ErrorCode;
import org.raddatz.familienarchiv.document.Document;
import org.raddatz.familienarchiv.document.DocumentService;
import org.raddatz.familienarchiv.document.DocumentStatus;
import org.raddatz.familienarchiv.document.ThumbnailAsyncRunner;
import org.raddatz.familienarchiv.person.Person;
import org.raddatz.familienarchiv.tag.Tag;
import org.raddatz.familienarchiv.person.Person;
import org.raddatz.familienarchiv.person.PersonNameParser;
import org.raddatz.familienarchiv.person.PersonService;
import org.raddatz.familienarchiv.tag.TagService;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.scheduling.annotation.Async;
import org.springframework.stereotype.Service;
import org.springframework.transaction.annotation.Transactional;
import org.w3c.dom.Element;
import org.w3c.dom.NodeList;
import software.amazon.awssdk.core.sync.RequestBody;
import software.amazon.awssdk.services.s3.S3Client;
import software.amazon.awssdk.services.s3.model.PutObjectRequest;
import javax.xml.parsers.DocumentBuilderFactory;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.time.LocalDate;
import java.time.LocalDateTime;
import java.time.format.DateTimeFormatter;
import java.time.format.DateTimeParseException;
import java.util.ArrayList;
import java.util.List;
import java.util.Locale;
import java.util.Optional;
import java.util.UUID;
import java.util.stream.Stream;
import java.util.zip.ZipFile;
@Service
@RequiredArgsConstructor
@Slf4j
public class MassImportService {
public enum State { IDLE, RUNNING, DONE, FAILED }
public enum SkipReason {
INVALID_FILENAME_PATH_TRAVERSAL,
INVALID_PDF_SIGNATURE,
FILE_READ_ERROR,
ALREADY_EXISTS,
S3_UPLOAD_FAILED
}
public record SkippedFile(
@Schema(requiredMode = Schema.RequiredMode.REQUIRED) String filename,
@Schema(requiredMode = Schema.RequiredMode.REQUIRED) SkipReason reason
) {}
public record ImportStatus(
@Schema(requiredMode = Schema.RequiredMode.REQUIRED) State state,
@Schema(requiredMode = Schema.RequiredMode.REQUIRED) String statusCode,
@JsonIgnore String message,
@Schema(requiredMode = Schema.RequiredMode.REQUIRED) int processed,
@Schema(requiredMode = Schema.RequiredMode.REQUIRED) List<SkippedFile> skippedFiles,
LocalDateTime startedAt
) {
// Note: @Schema on a record accessor method is not picked up by SpringDoc; the
// "skipped" count is a computed convenience field derived from skippedFiles.size().
@JsonProperty("skipped")
public int skipped() { return skippedFiles.size(); }
/** Defensive-copy constructor — callers cannot mutate the stored list after construction. */
public ImportStatus {
skippedFiles = List.copyOf(skippedFiles);
}
}
record ProcessResult(int processed, List<SkippedFile> skippedFiles) {}
private volatile ImportStatus currentStatus = new ImportStatus(State.IDLE, "IMPORT_IDLE", "Kein Import gestartet.", 0, List.of(), null);
public ImportStatus getStatus() {
return currentStatus;
}
private final DocumentService documentService;
private final PersonService personService;
private final TagService tagService;
private final S3Client s3Client;
private final ThumbnailAsyncRunner thumbnailAsyncRunner;
@Value("${app.s3.bucket}")
private String bucketName;
@Value("${app.import.col.index:0}")
private int colIndex;
@Value("${app.import.col.box:1}")
private int colBox;
@Value("${app.import.col.folder:2}")
private int colFolder;
@Value("${app.import.col.sender:3}")
private int colSender;
@Value("${app.import.col.receivers:5}")
private int colReceivers;
@Value("${app.import.col.date:7}")
private int colDate;
@Value("${app.import.col.location:9}")
private int colLocation;
@Value("${app.import.col.tags:10}")
private int colTags;
@Value("${app.import.col.summary:11}")
private int colSummary;
@Value("${app.import.col.transcription:13}")
private int colTranscription;
@Value("${app.import.dir:/import}")
private String importDir;
private static final DateTimeFormatter GERMAN_DATE = DateTimeFormatter.ofPattern("d. MMMM yyyy", Locale.GERMAN);
// ODS XML namespaces
private static final String NS_TABLE = "urn:oasis:names:tc:opendocument:xmlns:table:1.0";
private static final String NS_TEXT = "urn:oasis:names:tc:opendocument:xmlns:text:1.0";
// We only need up to this many columns; caps repeated-empty-cell expansion
private static final int MAX_COLS = 20;
@Async
public void runImportAsync() {
if (currentStatus.state() == State.RUNNING) {
throw DomainException.conflict(ErrorCode.IMPORT_ALREADY_RUNNING, "A mass import is already in progress");
}
currentStatus = new ImportStatus(State.RUNNING, "IMPORT_RUNNING", "Import läuft...", 0, List.of(), LocalDateTime.now());
try {
File spreadsheet = findSpreadsheetFile();
log.info("Starte Massenimport aus: {}", spreadsheet.getAbsolutePath());
ProcessResult result = processRows(readSpreadsheet(spreadsheet));
currentStatus = new ImportStatus(State.DONE, "IMPORT_DONE",
"Import abgeschlossen. " + result.processed() + " Dokumente verarbeitet.",
result.processed(), result.skippedFiles(), currentStatus.startedAt());
} catch (NoSpreadsheetException e) {
log.error("Massenimport fehlgeschlagen: keine Tabellendatei", e);
currentStatus = new ImportStatus(State.FAILED, "IMPORT_FAILED_NO_SPREADSHEET",
"Fehler: " + e.getMessage(), 0, List.of(), currentStatus.startedAt());
} catch (Exception e) {
log.error("Massenimport fehlgeschlagen", e);
currentStatus = new ImportStatus(State.FAILED, "IMPORT_FAILED_INTERNAL",
"Fehler: " + e.getMessage(), 0, List.of(), currentStatus.startedAt());
}
}
private static class NoSpreadsheetException extends RuntimeException {
NoSpreadsheetException(String message) { super(message); }
}
private File findSpreadsheetFile() throws IOException {
try (Stream<Path> files = Files.list(Paths.get(importDir))) {
return files
.filter(p -> {
String name = p.toString().toLowerCase();
return name.endsWith(".ods") || name.endsWith(".xlsx") || name.endsWith(".xls");
})
.findFirst()
.orElseThrow(() -> new NoSpreadsheetException(
"Keine Tabellendatei (.ods/.xlsx/.xls) in " + importDir + " gefunden!"))
.toFile();
}
}
// --- Spreadsheet reading (format-specific, produces neutral List<List<String>>) ---
private List<List<String>> readSpreadsheet(File file) throws Exception {
String name = file.getName().toLowerCase();
if (name.endsWith(".ods")) {
return readOds(file);
}
return readXlsx(file);
}
/**
* Reads an ODS file by parsing its content.xml directly (no extra library needed).
* ODS is a ZIP archive; content.xml holds the spreadsheet data as XML.
*/
List<List<String>> readOds(File file) throws Exception {
List<List<String>> result = new ArrayList<>();
try (ZipFile zip = new ZipFile(file)) {
var entry = zip.getEntry("content.xml");
if (entry == null) throw new RuntimeException("Ungültige ODS-Datei: content.xml fehlt");
var factory = XxeSafeXmlParser.hardenedFactory();
factory.setNamespaceAware(true);
var builder = factory.newDocumentBuilder();
var doc = builder.parse(zip.getInputStream(entry));
NodeList tables = doc.getElementsByTagNameNS(NS_TABLE, "table");
if (tables.getLength() == 0) return result;
var table = (Element) tables.item(0);
NodeList rows = table.getElementsByTagNameNS(NS_TABLE, "table-row");
for (int i = 0; i < rows.getLength(); i++) {
var row = (Element) rows.item(i);
List<String> rowData = new ArrayList<>();
NodeList cells = row.getElementsByTagNameNS(NS_TABLE, "table-cell");
for (int j = 0; j < cells.getLength() && rowData.size() < MAX_COLS; j++) {
var cell = (Element) cells.item(j);
// Read the display text (first <text:p>)
String value = "";
NodeList textNodes = cell.getElementsByTagNameNS(NS_TEXT, "p");
if (textNodes.getLength() > 0) {
value = textNodes.item(0).getTextContent().trim();
}
// Expand number-columns-repeated (capped at MAX_COLS)
String repeatAttr = cell.getAttributeNS(NS_TABLE, "number-columns-repeated");
int repeat = repeatAttr.isEmpty() ? 1 : Integer.parseInt(repeatAttr);
repeat = Math.min(repeat, MAX_COLS - rowData.size());
for (int r = 0; r < repeat; r++) {
rowData.add(value);
}
}
result.add(rowData);
}
}
return result;
}
/** Reads an XLSX/XLS file using Apache POI. Converts all cells to strings. */
private List<List<String>> readXlsx(File file) throws Exception {
List<List<String>> result = new ArrayList<>();
try (FileInputStream fis = new FileInputStream(file);
Workbook workbook = WorkbookFactory.create(fis)) {
Sheet sheet = workbook.getSheetAt(0);
for (int i = 0; i <= sheet.getLastRowNum(); i++) {
Row row = sheet.getRow(i);
List<String> rowData = new ArrayList<>();
if (row != null) {
for (int j = 0; j < MAX_COLS; j++) {
rowData.add(xlsxCellToString(row.getCell(j)));
}
}
result.add(rowData);
}
}
return result;
}
private String xlsxCellToString(Cell cell) {
if (cell == null) return "";
return switch (cell.getCellType()) {
case STRING -> cell.getStringCellValue();
case NUMERIC -> {
if (DateUtil.isCellDateFormatted(cell)) {
yield cell.getLocalDateTimeCellValue().toLocalDate().toString(); // ISO
}
yield String.valueOf((int) cell.getNumericCellValue());
}
case BOOLEAN -> String.valueOf(cell.getBooleanCellValue());
default -> "";
};
}
// --- Import logic (works on neutral List<String> rows) ---
private ProcessResult processRows(List<List<String>> rows) {
int processed = 0;
List<SkippedFile> skippedFiles = new ArrayList<>();
for (int i = 1; i < rows.size(); i++) { // skip header row
List<String> cells = rows.get(i);
String index = getCell(cells, colIndex);
if (index.isBlank()) continue;
String filename = index.contains(".") ? index : index + ".pdf";
if (!isValidImportFilename(filename)) {
log.warn("Skipping import row {}: filename rejected — {}", i, filename);
skippedFiles.add(new SkippedFile(filename, SkipReason.INVALID_FILENAME_PATH_TRAVERSAL));
continue;
}
Optional<File> fileOnDisk = findFileRecursive(filename);
if (fileOnDisk.isEmpty()) {
log.warn("Datei nicht gefunden, importiere nur Metadaten: {}", filename);
}
if (fileOnDisk.isPresent()) {
try {
if (!isPdfMagicBytes(fileOnDisk.get())) {
log.warn("Überspringe {}: Datei beginnt nicht mit %PDF-Signatur", filename);
skippedFiles.add(new SkippedFile(filename, SkipReason.INVALID_PDF_SIGNATURE));
continue;
}
} catch (IOException e) {
log.error("Fehler beim Prüfen der Magic-Bytes für {}", filename, e);
skippedFiles.add(new SkippedFile(filename, SkipReason.FILE_READ_ERROR));
continue;
}
}
Optional<SkipReason> skipReason = importSingleDocument(cells, fileOnDisk, filename, index);
if (skipReason.isPresent()) {
skippedFiles.add(new SkippedFile(filename, skipReason.get()));
} else {
processed++;
}
}
return new ProcessResult(processed, skippedFiles);
}
private boolean isValidImportFilename(String filename) {
if (filename == null || filename.isBlank()) return false;
if (filename.contains("/")) return false;
if (filename.contains("\\")) return false;
if (filename.contains("")) return false; // U+2215 DIVISION SLASH
if (filename.contains("")) return false; // U+FF0F FULLWIDTH SOLIDUS
if (filename.contains("")) return false; // U+29F5 REVERSE SOLIDUS OPERATOR
if (filename.contains("..")) return false;
if (filename.equals(".")) return false;
if (filename.contains("\0")) return false;
// Paths.get() is safe here on Linux for all inputs that passed the checks above;
// it may throw InvalidPathException for OS-specific illegal chars on Windows,
// but those are not reachable in production.
if (Paths.get(filename).isAbsolute()) return false;
return true;
}
// package-private: Mockito spy in tests can override to inject IOException
InputStream openFileStream(File file) throws IOException {
return new FileInputStream(file);
}
private boolean isPdfMagicBytes(File file) throws IOException {
try (InputStream is = openFileStream(file)) {
byte[] header = is.readNBytes(4);
return header.length == 4
&& header[0] == 0x25 // %
&& header[1] == 0x50 // P
&& header[2] == 0x44 // D
&& header[3] == 0x46; // F
}
}
/**
* Imports a single document row.
*
* @return empty Optional on success; an Optional containing the skip reason on failure/skip.
*/
@Transactional
protected Optional<SkipReason> importSingleDocument(List<String> cells, Optional<File> file, String originalFilename, String index) {
Optional<Document> existing = documentService.findByOriginalFilename(originalFilename);
if (existing.isPresent() && existing.get().getStatus() != DocumentStatus.PLACEHOLDER) {
log.info("Dokument {} existiert bereits, überspringe.", originalFilename);
return Optional.of(SkipReason.ALREADY_EXISTS);
}
String archiveBox = getCell(cells, colBox);
String archiveFolder = getCell(cells, colFolder);
String senderRaw = getCell(cells, colSender);
String receiversRaw = getCell(cells, colReceivers);
LocalDate date = parseDate(getCell(cells, colDate));
String location = getCell(cells, colLocation);
String tagRaw = getCell(cells, colTags);
String summary = getCell(cells, colSummary);
String transcription = getCell(cells, colTranscription);
String s3Key = null;
String contentType = null;
DocumentStatus status = DocumentStatus.PLACEHOLDER;
if (file.isPresent()) {
try {
contentType = Files.probeContentType(file.get().toPath());
} catch (IOException e) {
contentType = null;
}
if (contentType == null) contentType = "application/octet-stream";
s3Key = "documents/" + UUID.randomUUID() + "_" + file.get().getName();
try {
s3Client.putObject(PutObjectRequest.builder()
.bucket(bucketName)
.key(s3Key)
.contentType(contentType)
.build(),
RequestBody.fromFile(file.get()));
status = DocumentStatus.UPLOADED;
} catch (Exception e) {
log.error("S3 Upload Fehler für {}", file.get().getName(), e);
return Optional.of(SkipReason.S3_UPLOAD_FAILED);
}
}
Person sender = senderRaw.isBlank() ? null : findOrCreatePerson(senderRaw);
List<Person> receivers = PersonNameParser.parseReceivers(receiversRaw).stream()
.map(this::findOrCreatePerson)
.filter(Objects::nonNull)
.toList();
Tag tag = null;
if (!tagRaw.isBlank()) {
tag = tagService.findOrCreate(tagRaw);
}
Document doc = existing.orElse(Document.builder()
.originalFilename(originalFilename)
.build());
// Heuristic: mark as complete if at least one key field is present in the spreadsheet row
boolean metadataComplete = date != null || !senderRaw.isBlank() || !receiversRaw.isBlank();
doc.setTitle(buildTitle(index, date, location));
doc.setFilePath(s3Key);
doc.setContentType(contentType);
doc.setStatus(status);
doc.setArchiveBox(archiveBox.isBlank() ? null : archiveBox);
doc.setArchiveFolder(archiveFolder.isBlank() ? null : archiveFolder);
doc.setDocumentDate(date);
doc.setLocation(location.isBlank() ? null : location);
doc.setSummary(summary.isBlank() ? null : summary);
doc.setTranscription(transcription.isBlank() ? null : transcription);
doc.setSender(sender);
doc.getReceivers().addAll(receivers);
if (tag != null) doc.getTags().add(tag);
doc.setMetadataComplete(metadataComplete);
Document saved = documentService.save(doc);
if (file.isPresent()) {
thumbnailAsyncRunner.dispatchAfterCommit(saved.getId());
}
log.info("Importiert{}: {}", file.isEmpty() ? " (nur Metadaten)" : "", originalFilename);
return Optional.empty();
}
// --- Helpers ---
private String getCell(List<String> cells, int col) {
if (col >= cells.size()) return "";
String val = cells.get(col);
return val == null ? "" : val.trim();
}
private LocalDate parseDate(String value) {
if (value == null || value.isBlank()) return null;
try {
return LocalDate.parse(value.trim());
} catch (DateTimeParseException e) {
return null;
}
}
private String buildTitle(String index, LocalDate date, String location) {
StringBuilder sb = new StringBuilder(index);
if (date != null) {
sb.append(" \u2013 ").append(date.format(GERMAN_DATE));
}
if (location != null && !location.isBlank()) {
sb.append(" \u2013 ").append(location);
}
return sb.toString();
}
private Person findOrCreatePerson(String rawName) {
return personService.findOrCreateByAlias(rawName);
}
private Optional<File> findFileRecursive(String filename) {
File baseDir = new File(importDir);
try (Stream<Path> walk = Files.walk(baseDir.toPath())) {
Optional<Path> match = walk.filter(p -> !Files.isDirectory(p))
.filter(p -> p.getFileName().toString().equals(filename))
.findFirst();
if (match.isEmpty()) return Optional.empty();
File candidate = match.get().toFile();
String baseDirCanonical = baseDir.getCanonicalPath();
if (!candidate.getCanonicalPath().startsWith(baseDirCanonical + File.separator)) {
throw DomainException.internal(ErrorCode.INTERNAL_ERROR, "Path escape detected: " + candidate);
}
return Optional.of(candidate);
} catch (IOException e) {
return Optional.empty();
}
}
}

View File

@@ -0,0 +1,69 @@
package org.raddatz.familienarchiv.importing;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.raddatz.familienarchiv.person.PersonService;
import org.raddatz.familienarchiv.person.PersonType;
import org.raddatz.familienarchiv.person.PersonUpsertCommand;
import org.springframework.stereotype.Component;
import java.io.File;
import java.time.LocalDate;
import java.time.format.DateTimeParseException;
import java.util.List;
/**
* Loads {@code canonical-persons.xlsx} (the register) into the person domain via
* {@link PersonService}, upserting each person by the normalizer {@code person_id}
* (source_ref). Register persons are confident identities, so {@code provisional} is
* driven by the sheet's already-clean value (normally {@code False}).
*/
@Component
@RequiredArgsConstructor
@Slf4j
public class PersonRegisterImporter {
static final List<String> REQUIRED_HEADERS = List.of("person_id", "last_name", "first_name", "provisional");
private final PersonService personService;
public int load(File artifact) {
List<CanonicalSheetReader.Row> rows = CanonicalSheetReader.readRows(artifact, REQUIRED_HEADERS);
int processed = 0;
for (CanonicalSheetReader.Row row : rows) {
String personId = row.get("person_id");
if (personId.isBlank()) continue;
personService.upsertBySourceRef(toCommand(row, personId));
processed++;
}
log.info("Imported {} register persons from {}", processed, artifact.getName());
return processed;
}
private PersonUpsertCommand toCommand(CanonicalSheetReader.Row row, String personId) {
return PersonUpsertCommand.builder()
.sourceRef(personId)
.lastName(blankToNull(row.get("last_name")))
.firstName(blankToNull(row.get("first_name")))
.maidenName(blankToNull(row.get("maiden_name")))
.notes(blankToNull(row.get("notes")))
.birthYear(yearOf(row.get("birth_date")))
.deathYear(yearOf(row.get("death_date")))
.personType(PersonType.PERSON)
.provisional(Boolean.parseBoolean(row.get("provisional")))
.build();
}
private static Integer yearOf(String isoDate) {
if (isoDate == null || isoDate.isBlank()) return null;
try {
return LocalDate.parse(isoDate.trim()).getYear();
} catch (DateTimeParseException e) {
return null;
}
}
private static String blankToNull(String s) {
return (s == null || s.isBlank()) ? null : s;
}
}

View File

@@ -0,0 +1,135 @@
package org.raddatz.familienarchiv.importing;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.raddatz.familienarchiv.exception.DomainException;
import org.raddatz.familienarchiv.exception.ErrorCode;
import org.raddatz.familienarchiv.person.Person;
import org.raddatz.familienarchiv.person.PersonService;
import org.raddatz.familienarchiv.person.PersonType;
import org.raddatz.familienarchiv.person.PersonUpsertCommand;
import org.raddatz.familienarchiv.person.relationship.RelationType;
import org.raddatz.familienarchiv.person.relationship.RelationshipService;
import org.raddatz.familienarchiv.person.relationship.dto.CreateRelationshipRequest;
import org.springframework.stereotype.Component;
import java.io.File;
import java.util.HashMap;
import java.util.Map;
import java.util.UUID;
/**
* Loads {@code canonical-persons-tree.json} into the person + relationship domains.
* Tree persons are upserted via {@link PersonService} keyed on the shared
* {@code personId} slug (which Phase 1 #670 now emits into the tree), so they reconcile
* with the register rather than duplicating it. Relationships reference persons by the
* tree's local {@code rowId}; each side is mapped to the upserted person's UUID and
* created through {@link RelationshipService} (never the relationship repository —
* layering rule). A duplicate relationship on re-import is swallowed for idempotency.
*/
@Component
@RequiredArgsConstructor
@Slf4j
public class PersonTreeImporter {
// The tree JSON is a local implementation detail, not a shared API payload, so the
// importer owns its own mapper rather than depending on the web ObjectMapper bean.
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
private final PersonService personService;
private final RelationshipService relationshipService;
public int load(File artifact) {
JsonNode root = readTree(artifact);
Map<String, UUID> idByRowId = upsertPersons(root.path("persons"));
int relationships = createRelationships(root.path("relationships"), idByRowId);
log.info("Imported {} tree persons and {} relationships from {}",
idByRowId.size(), relationships, artifact.getName());
return idByRowId.size();
}
private JsonNode readTree(File artifact) {
try {
return OBJECT_MAPPER.readTree(artifact);
} catch (Exception e) {
throw DomainException.badRequest(ErrorCode.IMPORT_ARTIFACT_INVALID,
"Unreadable canonical artifact: " + artifact.getName());
}
}
private Map<String, UUID> upsertPersons(JsonNode persons) {
Map<String, UUID> idByRowId = new HashMap<>();
for (JsonNode node : persons) {
String personId = text(node, "personId");
if (personId.isBlank()) continue;
Person person = personService.upsertBySourceRef(toCommand(node, personId));
idByRowId.put(text(node, "rowId"), person.getId());
}
return idByRowId;
}
private PersonUpsertCommand toCommand(JsonNode node, String personId) {
return PersonUpsertCommand.builder()
.sourceRef(personId)
.lastName(blankToNull(text(node, "lastName")))
.firstName(blankToNull(text(node, "firstName")))
.maidenName(blankToNull(text(node, "maidenName")))
.notes(blankToNull(text(node, "notes")))
.birthYear(intOrNull(node, "birthYear"))
.deathYear(intOrNull(node, "deathYear"))
.familyMember(node.path("familyMember").asBoolean(false))
.personType(PersonType.PERSON)
.provisional(false)
.build();
}
private int createRelationships(JsonNode relationships, Map<String, UUID> idByRowId) {
int created = 0;
for (JsonNode node : relationships) {
// Trap: a relationship node's personId / relatedPersonId fields carry the tree's
// local rowId (e.g. "row_a"), NOT a person slug. They are resolved through
// idByRowId to the upserted person's UUID.
UUID person = idByRowId.get(text(node, "personId"));
UUID related = idByRowId.get(text(node, "relatedPersonId"));
if (person == null || related == null) {
log.warn("Skipping tree relationship with unresolved rowId: {} -> {}",
text(node, "personId"), text(node, "relatedPersonId"));
continue;
}
if (addRelationshipIdempotently(person, related, text(node, "type"))) {
created++;
}
}
return created;
}
private boolean addRelationshipIdempotently(UUID person, UUID related, String type) {
try {
relationshipService.addRelationship(person,
new CreateRelationshipRequest(related, RelationType.valueOf(type), null, null, null));
return true;
} catch (DomainException e) {
if (e.getCode() == ErrorCode.DUPLICATE_RELATIONSHIP
|| e.getCode() == ErrorCode.CIRCULAR_RELATIONSHIP) {
return false;
}
throw e;
}
}
private static String text(JsonNode node, String field) {
JsonNode value = node.get(field);
return value == null || value.isNull() ? "" : value.asText();
}
private static Integer intOrNull(JsonNode node, String field) {
JsonNode value = node.get(field);
return value == null || value.isNull() ? null : value.asInt();
}
private static String blankToNull(String s) {
return (s == null || s.isBlank()) ? null : s;
}
}

View File

@@ -0,0 +1,54 @@
package org.raddatz.familienarchiv.importing;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.raddatz.familienarchiv.tag.Tag;
import org.raddatz.familienarchiv.tag.TagService;
import org.springframework.stereotype.Component;
import java.io.File;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.UUID;
/**
* Loads {@code canonical-tag-tree.xlsx} into the tag domain via {@link TagService},
* upserting each tag by its canonical {@code tag_path} (the source_ref). Parent links are
* resolved by the parent's path, which is the child path with its last {@code /segment}
* stripped. Rows are emitted parents-first by the normalizer, so a parent is always
* resolved before any child references it.
*/
@Component
@RequiredArgsConstructor
@Slf4j
public class TagTreeImporter {
static final List<String> REQUIRED_HEADERS = List.of("tag_path", "parent_name", "tag_name");
private static final String PATH_SEPARATOR = "/";
private final TagService tagService;
public int load(File artifact) {
List<CanonicalSheetReader.Row> rows = CanonicalSheetReader.readRows(artifact, REQUIRED_HEADERS);
Map<String, UUID> idByPath = new HashMap<>();
int processed = 0;
for (CanonicalSheetReader.Row row : rows) {
String path = row.get("tag_path");
if (path.isBlank()) continue;
UUID parentId = resolveParentId(path, idByPath);
Tag tag = tagService.upsertBySourceRef(path, row.get("tag_name"), parentId);
idByPath.put(path, tag.getId());
processed++;
}
log.info("Imported {} tags from {}", processed, artifact.getName());
return processed;
}
private UUID resolveParentId(String path, Map<String, UUID> idByPath) {
int lastSeparator = path.lastIndexOf(PATH_SEPARATOR);
if (lastSeparator < 0) return null;
String parentPath = path.substring(0, lastSeparator);
return idByPath.get(parentPath);
}
}

View File

@@ -1,20 +0,0 @@
package org.raddatz.familienarchiv.importing;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
class XxeSafeXmlParser {
private XxeSafeXmlParser() {}
static DocumentBuilderFactory hardenedFactory() throws ParserConfigurationException {
var factory = DocumentBuilderFactory.newInstance();
factory.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true);
factory.setFeature("http://xml.org/sax/features/external-general-entities", false);
factory.setFeature("http://xml.org/sax/features/external-parameter-entities", false);
factory.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
factory.setXIncludeAware(false);
factory.setExpandEntityReferences(false);
return factory;
}
}

View File

@@ -32,6 +32,9 @@ public interface PersonRepository extends JpaRepository<Person, UUID> {
// Lookup by full alias string, used during ODS mass import // Lookup by full alias string, used during ODS mass import
Optional<Person> findByAliasIgnoreCase(String alias); Optional<Person> findByAliasIgnoreCase(String alias);
// Lookup by the normalizer person_id, used for idempotent canonical re-import (Phase 3).
Optional<Person> findBySourceRef(String sourceRef);
// Exact first+last name match, used for filename-based sender lookup // Exact first+last name match, used for filename-based sender lookup
Optional<Person> findByFirstNameIgnoreCaseAndLastNameIgnoreCase(String firstName, String lastName); Optional<Person> findByFirstNameIgnoreCaseAndLastNameIgnoreCase(String firstName, String lastName);

View File

@@ -80,6 +80,11 @@ public class PersonService {
return personRepository.findByFirstNameIgnoreCaseAndLastNameIgnoreCase(firstName, lastName); return personRepository.findByFirstNameIgnoreCaseAndLastNameIgnoreCase(firstName, lastName);
} }
/** Lookup by the normalizer person_id — used by the canonical importer for register-first matching. */
public Optional<Person> findBySourceRef(String sourceRef) {
return personRepository.findBySourceRef(sourceRef);
}
@Nullable @Nullable
@Transactional @Transactional
public Person findOrCreateByAlias(String rawName) { public Person findOrCreateByAlias(String rawName) {
@@ -115,6 +120,80 @@ public class PersonService {
}); });
} }
/**
* Idempotent upsert keyed on {@code sourceRef} (the normalizer person_id) for the
* canonical importer (Phase 3, ADR-025). On first import the canonical fields are
* written verbatim. On re-import the human-edit-preserve precedence applies:
* a non-blank existing field is never overwritten, and {@code provisional} never
* flips back to true once a human has confirmed the person.
*/
@Transactional
public Person upsertBySourceRef(PersonUpsertCommand cmd) {
return personRepository.findBySourceRef(cmd.sourceRef())
.map(existing -> personRepository.save(mergeCanonical(existing, cmd)))
.orElseGet(() -> fromCanonical(cmd));
}
private Person fromCanonical(PersonUpsertCommand cmd) {
Person person = personRepository.save(Person.builder()
.sourceRef(cmd.sourceRef())
.firstName(blankToNull(cmd.firstName()))
.lastName(cmd.lastName())
.notes(blankToNull(cmd.notes()))
.birthYear(cmd.birthYear())
.deathYear(cmd.deathYear())
.familyMember(cmd.familyMember())
.personType(cmd.personType() == null ? PersonType.PERSON : cmd.personType())
.provisional(cmd.provisional())
.build());
String maiden = blankToNull(cmd.maidenName());
if (maiden != null) {
int nextSortOrder = aliasRepository.findMaxSortOrder(person.getId()) + 1;
aliasRepository.save(PersonNameAlias.builder()
.person(person)
.lastName(maiden)
.type(PersonNameAliasType.MAIDEN_NAME)
.sortOrder(nextSortOrder)
.build());
}
return person;
}
private Person mergeCanonical(Person existing, PersonUpsertCommand cmd) {
existing.setFirstName(preferHuman(existing.getFirstName(), cmd.firstName()));
existing.setLastName(preferHuman(existing.getLastName(), cmd.lastName()));
existing.setNotes(preferHuman(existing.getNotes(), cmd.notes()));
existing.setBirthYear(preferHuman(existing.getBirthYear(), cmd.birthYear()));
existing.setDeathYear(preferHuman(existing.getDeathYear(), cmd.deathYear()));
if (cmd.personType() != null && existing.getPersonType() == PersonType.PERSON) {
existing.setPersonType(cmd.personType());
}
// provisional is monotonic-downward: once it is false it never reverts to true.
// This also pins the cross-loader precedence (ADR-025): a register/tree person is
// loaded before documents and already false, so a later document row that references
// the same source_ref (provisional=true) can never flip it provisional — the guard
// below only fires while existing is still provisional. Order of document rows is
// therefore irrelevant.
if (existing.isProvisional()) {
existing.setProvisional(cmd.provisional());
}
return existing;
}
// preferHuman keeps an existing human-entered value and only falls back to the canonical
// value when the existing one is absent — the single idiom for every fill-blank field.
private static String preferHuman(String existing, String canonical) {
return (existing == null || existing.isBlank()) ? blankToNull(canonical) : existing;
}
private static Integer preferHuman(Integer existing, Integer canonical) {
return existing != null ? existing : canonical;
}
private static String blankToNull(String s) {
return (s == null || s.isBlank()) ? null : s.trim();
}
@Transactional @Transactional
public Person createPerson(String firstName, String lastName, String alias) { public Person createPerson(String firstName, String lastName, String alias) {
Person person = Person.builder() Person person = Person.builder()

View File

@@ -0,0 +1,24 @@
package org.raddatz.familienarchiv.person;
import lombok.Builder;
/**
* Importer → {@link PersonService} command for an idempotent upsert keyed on
* {@code sourceRef} (the normalizer's stable person_id). Carries only the canonical
* fields the importer owns; the service applies the human-edit-preserve precedence
* (see ADR-025): non-blank existing fields are never overwritten, and {@code provisional}
* never flips back to true once a human has confirmed a person.
*/
@Builder
public record PersonUpsertCommand(
String sourceRef,
String firstName,
String lastName,
String maidenName,
String notes,
Integer birthYear,
Integer deathYear,
boolean familyMember,
PersonType personType,
boolean provisional
) {}

View File

@@ -22,6 +22,9 @@ public interface TagRepository extends JpaRepository<Tag, UUID> {
Optional<Tag> findByNameIgnoreCase(String name); Optional<Tag> findByNameIgnoreCase(String name);
// Lookup by the canonical tag_path, used for idempotent canonical re-import (Phase 3).
Optional<Tag> findBySourceRef(String sourceRef);
List<Tag> findByNameContainingIgnoreCase(String name); List<Tag> findByNameContainingIgnoreCase(String name);
/** /**

View File

@@ -7,6 +7,7 @@ import java.util.HashSet;
import java.util.LinkedHashMap; import java.util.LinkedHashMap;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.Optional;
import java.util.Set; import java.util.Set;
import java.util.UUID; import java.util.UUID;
import java.util.stream.Collectors; import java.util.stream.Collectors;
@@ -49,12 +50,37 @@ public class TagService {
.orElseThrow(() -> DomainException.notFound(ErrorCode.TAG_NOT_FOUND, "Tag not found: " + id)); .orElseThrow(() -> DomainException.notFound(ErrorCode.TAG_NOT_FOUND, "Tag not found: " + id));
} }
/** Lookup by the canonical tag_path — used by the canonical importer to attach a document's tag. */
public Optional<Tag> findBySourceRef(String sourceRef) {
return tagRepository.findBySourceRef(sourceRef);
}
public Tag findOrCreate(String name) { public Tag findOrCreate(String name) {
String cleanName = name.trim(); String cleanName = name.trim();
return tagRepository.findByNameIgnoreCase(cleanName) return tagRepository.findByNameIgnoreCase(cleanName)
.orElseGet(() -> tagRepository.save(Tag.builder().name(cleanName).build())); .orElseGet(() -> tagRepository.save(Tag.builder().name(cleanName).build()));
} }
/**
* Idempotent upsert keyed on {@code sourceRef} (the canonical tag_path) for the
* Phase-3 importer (ADR-025). On first import the canonical name and parent are
* written; on re-import a human-renamed tag name is preserved (the source_ref is the
* stable identity, the name is a human-editable label).
*/
@Transactional
public Tag upsertBySourceRef(String sourceRef, String name, UUID parentId) {
return tagRepository.findBySourceRef(sourceRef)
.map(existing -> {
existing.setParentId(parentId);
return tagRepository.save(existing);
})
.orElseGet(() -> tagRepository.save(Tag.builder()
.sourceRef(sourceRef)
.name(name)
.parentId(parentId)
.build()));
}
@Transactional @Transactional
public Tag update(UUID id, TagUpdateDTO dto) { public Tag update(UUID id, TagUpdateDTO dto) {
Tag tag = getById(id); Tag tag = getById(id);

View File

@@ -5,7 +5,8 @@ import org.raddatz.familienarchiv.security.Permission;
import org.raddatz.familienarchiv.security.RequirePermission; import org.raddatz.familienarchiv.security.RequirePermission;
import org.raddatz.familienarchiv.document.DocumentService; import org.raddatz.familienarchiv.document.DocumentService;
import org.raddatz.familienarchiv.document.DocumentVersionService; import org.raddatz.familienarchiv.document.DocumentVersionService;
import org.raddatz.familienarchiv.importing.MassImportService; import org.raddatz.familienarchiv.importing.CanonicalImportOrchestrator;
import org.raddatz.familienarchiv.importing.ImportStatus;
import org.raddatz.familienarchiv.document.ThumbnailBackfillService; import org.raddatz.familienarchiv.document.ThumbnailBackfillService;
import org.springframework.http.ResponseEntity; import org.springframework.http.ResponseEntity;
import org.springframework.web.bind.annotation.GetMapping; import org.springframework.web.bind.annotation.GetMapping;
@@ -21,20 +22,20 @@ import lombok.RequiredArgsConstructor;
@RequiredArgsConstructor @RequiredArgsConstructor
public class AdminController { public class AdminController {
private final MassImportService massImportService; private final CanonicalImportOrchestrator importOrchestrator;
private final DocumentService documentService; private final DocumentService documentService;
private final DocumentVersionService documentVersionService; private final DocumentVersionService documentVersionService;
private final ThumbnailBackfillService thumbnailBackfillService; private final ThumbnailBackfillService thumbnailBackfillService;
@PostMapping("/trigger-import") @PostMapping("/trigger-import")
public ResponseEntity<MassImportService.ImportStatus> triggerMassImport() { public ResponseEntity<ImportStatus> triggerMassImport() {
massImportService.runImportAsync(); importOrchestrator.runImportAsync();
return ResponseEntity.accepted().body(massImportService.getStatus()); return ResponseEntity.accepted().body(importOrchestrator.getStatus());
} }
@GetMapping("/import-status") @GetMapping("/import-status")
public ResponseEntity<MassImportService.ImportStatus> importStatus() { public ResponseEntity<ImportStatus> importStatus() {
return ResponseEntity.ok(massImportService.getStatus()); return ResponseEntity.ok(importOrchestrator.getStatus());
} }
@PostMapping("/backfill-versions") @PostMapping("/backfill-versions")

View File

@@ -125,17 +125,10 @@ app:
password: ${APP_ADMIN_PASSWORD:admin123} password: ${APP_ADMIN_PASSWORD:admin123}
import: import:
col: # Directory holding the normalizer's committed canonical artifacts
index: 0 # (canonical-{documents,persons,tag-tree}.xlsx + canonical-persons-tree.json).
box: 1 # The loader maps columns by header name — no positional indices (see ADR-025).
folder: 2 dir: ${IMPORT_DIR:/import}
sender: 3
receivers: 5
date: 7
location: 9
tags: 10
summary: 11
transcription: 13
ocr: ocr:
sender-model: sender-model:

View File

@@ -0,0 +1,229 @@
package org.raddatz.familienarchiv.importing;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.raddatz.familienarchiv.PostgresContainerConfig;
import org.raddatz.familienarchiv.document.Document;
import org.raddatz.familienarchiv.document.DocumentRepository;
import org.raddatz.familienarchiv.document.DocumentStatus;
import org.raddatz.familienarchiv.person.Person;
import org.raddatz.familienarchiv.person.PersonRepository;
import org.raddatz.familienarchiv.tag.TagRepository;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.test.context.SpringBootTest;
import org.springframework.context.annotation.Import;
import org.springframework.test.context.ActiveProfiles;
import org.springframework.test.context.bean.override.mockito.MockitoBean;
import org.springframework.test.util.ReflectionTestUtils;
import software.amazon.awssdk.services.s3.S3Client;
import java.io.OutputStream;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.List;
import java.util.Optional;
import static org.assertj.core.api.Assertions.assertThat;
/**
* Real Postgres (Testcontainers) integration test for the canonical importer. The
* {@code UNIQUE(source_ref)} constraint and the upsert-on-conflict behaviour only exist
* in real Postgres (never H2), so idempotency is verified here. S3 is mocked — the
* synthetic document rows carry no on-disk files, so every document is a PLACEHOLDER and
* no upload is attempted.
*/
@SpringBootTest(webEnvironment = SpringBootTest.WebEnvironment.NONE)
@ActiveProfiles("test")
@Import(PostgresContainerConfig.class)
class CanonicalImportIntegrationTest {
@MockitoBean S3Client s3Client;
@Autowired CanonicalImportOrchestrator orchestrator;
@Autowired PersonRepository personRepository;
@Autowired TagRepository tagRepository;
@Autowired DocumentRepository documentRepository;
Path artifactDir;
@BeforeEach
void setUp() throws Exception {
documentRepository.deleteAll();
personRepository.deleteAll();
tagRepository.deleteAll();
artifactDir = Files.createTempDirectory("canonical-import-it");
writeArtifacts(artifactDir);
ReflectionTestUtils.setField(orchestrator, "canonicalDir", artifactDir.toString());
}
/**
* The import commits through its own transactions (the orchestrator is not transactional),
* so this test cannot rely on {@code @Transactional} rollback for isolation. Delete the
* committed rows after each test — otherwise the last test's documents (dated 1888-02) and
* persons/tags leak into the shared Testcontainers Postgres and pollute other integration
* tests that assume a known seed (e.g. DocumentDensityIntegrationTest,
* DocumentSearchPagedIntegrationTest). Mirrors the @AfterEach deleteAll convention used by
* DocumentListItemIntegrationTest.
*/
@AfterEach
void cleanup() {
documentRepository.deleteAll();
personRepository.deleteAll();
tagRepository.deleteAll();
}
@Test
void reimport_isIdempotent_noDuplicatePersonsTagsOrDocuments() {
orchestrator.runImport();
long personsAfterFirst = personRepository.count();
long tagsAfterFirst = tagRepository.count();
long documentsAfterFirst = documentRepository.count();
assertThat(orchestrator.getStatus().state()).isEqualTo(ImportStatus.State.DONE);
assertThat(personsAfterFirst).isPositive();
assertThat(tagsAfterFirst).isPositive();
assertThat(documentsAfterFirst).isPositive();
orchestrator.runImport();
assertThat(personRepository.count()).isEqualTo(personsAfterFirst);
assertThat(tagRepository.count()).isEqualTo(tagsAfterFirst);
assertThat(documentRepository.count()).isEqualTo(documentsAfterFirst);
}
@Test
void reimport_preservesHumanEditedPersonField() {
orchestrator.runImport();
Person walter = personRepository.findBySourceRef("de-gruyter-walter").orElseThrow();
walter.setNotes("Verified by archivist");
walter.setFirstName("Walther");
personRepository.save(walter);
orchestrator.runImport();
Person reimported = personRepository.findBySourceRef("de-gruyter-walter").orElseThrow();
assertThat(reimported.getNotes()).isEqualTo("Verified by archivist");
assertThat(reimported.getFirstName()).isEqualTo("Walther");
}
@Test
void import_linksDocumentSenderToRegisterPerson_andRetainsRawText() {
orchestrator.runImport();
Person walter = personRepository.findBySourceRef("de-gruyter-walter").orElseThrow();
Document doc = documentRepository.findByOriginalFilename("W-0001").orElseThrow();
assertThat(doc.getSender()).isNotNull();
assertThat(doc.getSender().getId()).isEqualTo(walter.getId());
assertThat(doc.getSenderText()).isEqualTo("Walter de Gruyter");
assertThat(doc.getStatus()).isEqualTo(DocumentStatus.PLACEHOLDER);
}
@Test
void import_provisionalFlag_trueForImporterCreated_falseForRegister() {
orchestrator.runImport();
Optional<Person> register = personRepository.findBySourceRef("de-gruyter-walter");
assertThat(register).get().extracting(Person::isProvisional).isEqualTo(false);
}
@Test
void reimport_prunesRemovedReceiverAndTag_whenCanonicalRowShrinks() throws Exception {
orchestrator.runImport();
// findById uses the Document.full entity graph so receivers/tags initialise eagerly.
Document before = documentRepository.findById(
documentRepository.findByOriginalFilename("W-0001").orElseThrow().getId()).orElseThrow();
assertThat(before.getReceivers()).isNotEmpty();
assertThat(before.getTags()).isNotEmpty();
// Re-stage the document sheet with W-0001's receiver and tag removed.
writeSheet(artifactDir.resolve("canonical-documents.xlsx"),
List.of("index", "file", "sender_person_id", "sender_name", "receiver_person_ids",
"receiver_names", "date_iso", "date_raw", "date_precision", "date_end", "location", "tags", "summary"),
List.of(
List.of("W-0001", "", "de-gruyter-walter", "Walter de Gruyter",
"", "", "1888-02-15", "15.2.1888", "DAY", "", "Rotterdam", "", "Geschäftsreise"),
List.of("W-0002", "", "de-gruyter-eugenie", "Eugenie de Gruyter",
"de-gruyter-walter", "Walter de Gruyter", "1888-02-16", "16.2.1888", "DAY", "",
"Middelburg", "Themen/Brautbriefe", "Reisepläne")));
orchestrator.runImport();
Document after = documentRepository.findById(before.getId()).orElseThrow();
assertThat(after.getReceivers()).isEmpty();
assertThat(after.getTags()).isEmpty();
}
@Test
void import_neverFlipsRegisterPersonToProvisional_whenReferencedByDocumentRow() {
// de-gruyter-walter is a register person (provisional=false) AND the sender of W-0001.
// The orchestrator loads the register before documents, so the document loader's
// register-first match links the existing person and never mints a provisional one.
// A second run (documents reference the same person again) must not flip it true.
orchestrator.runImport();
orchestrator.runImport();
Person walter = personRepository.findBySourceRef("de-gruyter-walter").orElseThrow();
assertThat(walter.isProvisional()).isFalse();
Person eugenie = personRepository.findBySourceRef("de-gruyter-eugenie").orElseThrow();
assertThat(eugenie.isProvisional()).isFalse();
}
// ─── synthetic-but-real artifact set ─────────────────────────────────────────────
private void writeArtifacts(Path dir) throws Exception {
writeSheet(dir.resolve("canonical-tag-tree.xlsx"),
List.of("tag_path", "parent_name", "tag_name"),
List.of(
List.of("Themen", "", "Themen"),
List.of("Themen/Brautbriefe", "Themen", "Brautbriefe")));
writeSheet(dir.resolve("canonical-persons.xlsx"),
List.of("person_id", "last_name", "first_name", "maiden_name", "notes", "birth_date", "death_date", "provisional"),
List.of(
List.of("de-gruyter-walter", "de Gruyter", "Walter", "", "", "1865-01-01", "", "False"),
List.of("de-gruyter-eugenie", "de Gruyter", "Eugenie", "Wöhler", "", "", "", "False")));
Files.writeString(dir.resolve("canonical-persons-tree.json"), """
{"persons":[
{"rowId":"row_1","firstName":"Walter","lastName":"de Gruyter","familyMember":true,"personId":"de-gruyter-walter"},
{"rowId":"row_2","firstName":"Eugenie","lastName":"de Gruyter","maidenName":"Wöhler","familyMember":true,"personId":"de-gruyter-eugenie"}
],"relationships":[
{"personId":"row_1","relatedPersonId":"row_2","type":"SPOUSE_OF","source":"verheiratet_mit"}
]}
""");
writeSheet(dir.resolve("canonical-documents.xlsx"),
List.of("index", "file", "sender_person_id", "sender_name", "receiver_person_ids",
"receiver_names", "date_iso", "date_raw", "date_precision", "date_end", "location", "tags", "summary"),
List.of(
List.of("W-0001", "", "de-gruyter-walter", "Walter de Gruyter",
"de-gruyter-eugenie", "Eugenie de Gruyter", "1888-02-15", "15.2.1888", "DAY", "",
"Rotterdam", "Themen/Brautbriefe", "Geschäftsreise"),
List.of("W-0002", "", "de-gruyter-eugenie", "Eugenie de Gruyter",
"de-gruyter-walter", "Walter de Gruyter", "1888-02-16", "16.2.1888", "DAY", "",
"Middelburg", "Themen/Brautbriefe", "Reisepläne")));
}
private void writeSheet(Path file, List<String> headers, List<List<String>> rows) throws Exception {
try (XSSFWorkbook wb = new XSSFWorkbook()) {
Sheet sheet = wb.createSheet("Sheet1");
Row header = sheet.createRow(0);
for (int i = 0; i < headers.size(); i++) {
header.createCell(i).setCellValue(headers.get(i));
}
for (int r = 0; r < rows.size(); r++) {
Row row = sheet.createRow(r + 1);
List<String> values = rows.get(r);
for (int c = 0; c < values.size(); c++) {
row.createCell(c).setCellValue(values.get(c));
}
}
try (OutputStream out = Files.newOutputStream(file)) {
wb.write(out);
}
}
}
}

View File

@@ -0,0 +1,130 @@
package org.raddatz.familienarchiv.importing;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.extension.ExtendWith;
import org.junit.jupiter.api.io.TempDir;
import org.mockito.InOrder;
import org.mockito.Mock;
import org.mockito.junit.jupiter.MockitoExtension;
import org.raddatz.familienarchiv.exception.DomainException;
import org.springframework.test.util.ReflectionTestUtils;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.List;
import static org.assertj.core.api.Assertions.assertThat;
import static org.assertj.core.api.Assertions.assertThatThrownBy;
import static org.mockito.ArgumentMatchers.any;
import static org.mockito.Mockito.inOrder;
import static org.mockito.Mockito.never;
import static org.mockito.Mockito.verify;
import static org.mockito.Mockito.when;
@ExtendWith(MockitoExtension.class)
class CanonicalImportOrchestratorTest {
@Mock TagTreeImporter tagTreeImporter;
@Mock PersonRegisterImporter personRegisterImporter;
@Mock PersonTreeImporter personTreeImporter;
@Mock DocumentImporter documentImporter;
private CanonicalImportOrchestrator orchestrator(Path dir) {
CanonicalImportOrchestrator o = new CanonicalImportOrchestrator(
tagTreeImporter, personRegisterImporter, personTreeImporter, documentImporter);
ReflectionTestUtils.setField(o, "canonicalDir", dir.toString());
return o;
}
private void writeAllArtifacts(Path dir) throws Exception {
Files.writeString(dir.resolve("canonical-tag-tree.xlsx"), "x");
Files.writeString(dir.resolve("canonical-persons.xlsx"), "x");
Files.writeString(dir.resolve("canonical-persons-tree.json"), "x");
Files.writeString(dir.resolve("canonical-documents.xlsx"), "x");
}
@Test
void getStatus_isIdleByDefault(@TempDir Path dir) {
assertThat(orchestrator(dir).getStatus().state()).isEqualTo(ImportStatus.State.IDLE);
}
@Test
void runImport_loadsTagsAndPersonsBeforeDocuments(@TempDir Path dir) throws Exception {
writeAllArtifacts(dir);
when(documentImporter.load(any())).thenReturn(new DocumentImporter.LoadResult(0, List.of()));
CanonicalImportOrchestrator o = orchestrator(dir);
o.runImport();
InOrder order = inOrder(tagTreeImporter, personRegisterImporter, personTreeImporter, documentImporter);
order.verify(tagTreeImporter).load(any());
order.verify(personRegisterImporter).load(any());
order.verify(personTreeImporter).load(any());
order.verify(documentImporter).load(any());
}
@Test
void runImport_setsStatusDone_onSuccess(@TempDir Path dir) throws Exception {
writeAllArtifacts(dir);
when(documentImporter.load(any())).thenReturn(new DocumentImporter.LoadResult(3, List.of()));
CanonicalImportOrchestrator o = orchestrator(dir);
o.runImport();
assertThat(o.getStatus().state()).isEqualTo(ImportStatus.State.DONE);
assertThat(o.getStatus().processed()).isEqualTo(3);
}
@Test
void runImport_failsClosed_whenAnArtifactIsMissing(@TempDir Path dir) throws Exception {
Files.writeString(dir.resolve("canonical-tag-tree.xlsx"), "x");
// the other three artifacts are absent
CanonicalImportOrchestrator o = orchestrator(dir);
o.runImport();
assertThat(o.getStatus().state()).isEqualTo(ImportStatus.State.FAILED);
verify(tagTreeImporter, never()).load(any());
verify(documentImporter, never()).load(any());
}
@Test
void runImport_setsStatusFailed_whenLoaderThrows(@TempDir Path dir) throws Exception {
writeAllArtifacts(dir);
when(tagTreeImporter.load(any())).thenThrow(DomainException.badRequest(
org.raddatz.familienarchiv.exception.ErrorCode.IMPORT_ARTIFACT_INVALID, "bad"));
CanonicalImportOrchestrator o = orchestrator(dir);
o.runImport();
assertThat(o.getStatus().state()).isEqualTo(ImportStatus.State.FAILED);
verify(documentImporter, never()).load(any());
}
@Test
void runImportAsync_throwsConflict_whenAlreadyRunning(@TempDir Path dir) {
CanonicalImportOrchestrator o = orchestrator(dir);
ReflectionTestUtils.setField(o, "currentStatus", new ImportStatus(
ImportStatus.State.RUNNING, "IMPORT_RUNNING", "running", 0, List.of(), null));
assertThatThrownBy(o::runImportAsync)
.isInstanceOf(DomainException.class)
.hasMessageContaining("already in progress");
}
@Test
void runImport_aggregatesDocumentSkips(@TempDir Path dir) throws Exception {
writeAllArtifacts(dir);
when(documentImporter.load(any())).thenReturn(new DocumentImporter.LoadResult(1,
List.of(new ImportStatus.SkippedFile("fake.pdf", ImportStatus.SkipReason.INVALID_PDF_SIGNATURE))));
CanonicalImportOrchestrator o = orchestrator(dir);
o.runImport();
assertThat(o.getStatus().skipped()).isEqualTo(1);
assertThat(o.getStatus().skippedFiles())
.extracting(ImportStatus.SkippedFile::filename)
.containsExactly("fake.pdf");
}
}

View File

@@ -0,0 +1,115 @@
package org.raddatz.familienarchiv.importing;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.io.TempDir;
import org.raddatz.familienarchiv.exception.DomainException;
import java.io.OutputStream;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.List;
import static org.assertj.core.api.Assertions.assertThat;
import static org.assertj.core.api.Assertions.assertThatThrownBy;
class CanonicalSheetReaderTest {
@Test
void readRows_mapsCellsByHeaderName(@TempDir Path tempDir) throws Exception {
Path xlsx = write(tempDir, List.of("index", "file"), List.of(List.of("W-0001", "scan.pdf")));
List<CanonicalSheetReader.Row> rows = CanonicalSheetReader.readRows(xlsx.toFile(), List.of("index", "file"));
assertThat(rows).hasSize(1);
assertThat(rows.get(0).get("index")).isEqualTo("W-0001");
assertThat(rows.get(0).get("file")).isEqualTo("scan.pdf");
}
@Test
void readRows_throwsBadRequest_whenRequiredHeaderMissing(@TempDir Path tempDir) throws Exception {
Path xlsx = write(tempDir, List.of("index"), List.of(List.of("W-0001")));
assertThatThrownBy(() -> CanonicalSheetReader.readRows(xlsx.toFile(), List.of("index", "file")))
.isInstanceOf(DomainException.class)
.hasMessageContaining("file");
}
@Test
void get_returnsEmptyString_forBlankCell(@TempDir Path tempDir) throws Exception {
Path xlsx = write(tempDir, List.of("index", "file"), List.of(List.of("W-0001", "")));
List<CanonicalSheetReader.Row> rows = CanonicalSheetReader.readRows(xlsx.toFile(), List.of("index", "file"));
assertThat(rows.get(0).get("file")).isEmpty();
}
@Test
void get_returnsEmptyString_forUnknownColumn(@TempDir Path tempDir) throws Exception {
Path xlsx = write(tempDir, List.of("index"), List.of(List.of("W-0001")));
List<CanonicalSheetReader.Row> rows = CanonicalSheetReader.readRows(xlsx.toFile(), List.of("index"));
assertThat(rows.get(0).get("does_not_exist")).isEmpty();
}
@Test
void get_returnsEmptyString_forTrailingColumns_whenRowShorterThanHeader(@TempDir Path tempDir) throws Exception {
// POI omits trailing empty cells, so a real-world artifact row can be narrower than
// the header. The missing columns must read as "" rather than throwing.
Path xlsx = write(tempDir,
List.of("index", "file", "summary"),
List.of(List.of("W-0001")));
List<CanonicalSheetReader.Row> rows = CanonicalSheetReader.readRows(xlsx.toFile(), List.of("index", "file", "summary"));
assertThat(rows.get(0).get("index")).isEqualTo("W-0001");
assertThat(rows.get(0).get("file")).isEmpty();
assertThat(rows.get(0).get("summary")).isEmpty();
}
@Test
void splitList_splitsOnPipe() {
assertThat(CanonicalSheetReader.splitList("a|b|c")).containsExactly("a", "b", "c");
}
@Test
void splitList_returnsEmptyList_forBlank() {
assertThat(CanonicalSheetReader.splitList("")).isEmpty();
assertThat(CanonicalSheetReader.splitList(" ")).isEmpty();
}
@Test
void splitList_returnsSingleElement_whenNoPipe() {
assertThat(CanonicalSheetReader.splitList("solo")).containsExactly("solo");
}
@Test
void splitList_trimsAndDropsEmptySegments() {
assertThat(CanonicalSheetReader.splitList("a| |b")).containsExactly("a", "b");
}
private Path write(Path dir, List<String> headers, List<List<String>> dataRows) throws Exception {
Path xlsx = dir.resolve("sheet.xlsx");
try (XSSFWorkbook wb = new XSSFWorkbook()) {
Sheet sheet = wb.createSheet("Sheet1");
Row header = sheet.createRow(0);
for (int i = 0; i < headers.size(); i++) {
header.createCell(i).setCellValue(headers.get(i));
}
for (int r = 0; r < dataRows.size(); r++) {
Row row = sheet.createRow(r + 1);
List<String> values = dataRows.get(r);
for (int c = 0; c < values.size(); c++) {
row.createCell(c).setCellValue(values.get(c));
}
}
try (OutputStream out = Files.newOutputStream(xlsx)) {
wb.write(out);
}
}
return xlsx;
}
}

View File

@@ -0,0 +1,459 @@
package org.raddatz.familienarchiv.importing;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.extension.ExtendWith;
import org.junit.jupiter.api.io.TempDir;
import org.mockito.Mock;
import org.mockito.junit.jupiter.MockitoExtension;
import org.raddatz.familienarchiv.document.Document;
import org.raddatz.familienarchiv.document.DocumentService;
import org.raddatz.familienarchiv.document.DocumentStatus;
import org.raddatz.familienarchiv.document.ThumbnailAsyncRunner;
import org.raddatz.familienarchiv.person.Person;
import org.raddatz.familienarchiv.person.PersonService;
import org.raddatz.familienarchiv.person.PersonUpsertCommand;
import org.raddatz.familienarchiv.tag.Tag;
import org.raddatz.familienarchiv.tag.TagService;
import org.springframework.test.util.ReflectionTestUtils;
import software.amazon.awssdk.core.sync.RequestBody;
import software.amazon.awssdk.services.s3.S3Client;
import software.amazon.awssdk.services.s3.model.PutObjectRequest;
import java.io.File;
import java.io.OutputStream;
import java.nio.file.Files;
import java.nio.file.Path;
import java.time.LocalDate;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.UUID;
import static org.assertj.core.api.Assertions.assertThat;
import static org.mockito.ArgumentMatchers.any;
import static org.mockito.Mockito.lenient;
import static org.mockito.Mockito.never;
import static org.mockito.Mockito.verify;
import static org.mockito.Mockito.when;
@ExtendWith(MockitoExtension.class)
class DocumentImporterTest {
@Mock DocumentService documentService;
@Mock PersonService personService;
@Mock TagService tagService;
@Mock S3Client s3Client;
@Mock ThumbnailAsyncRunner thumbnailAsyncRunner;
DocumentImporter importer;
@BeforeEach
void setUp() {
importer = new DocumentImporter(documentService, personService, tagService, s3Client, thumbnailAsyncRunner);
ReflectionTestUtils.setField(importer, "bucketName", "test-bucket");
}
// ─── security regression — ported from MassImportServiceTest — do not remove ─────
@Test
void isValidImportFilename_returnsFalse_whenNull() {
assertThat((Boolean) ReflectionTestUtils.invokeMethod(importer, "isValidImportFilename", (String) null)).isFalse();
}
@Test
void isValidImportFilename_returnsFalse_whenBlank() {
assertThat((Boolean) ReflectionTestUtils.invokeMethod(importer, "isValidImportFilename", " ")).isFalse();
}
@Test
void isValidImportFilename_returnsFalse_whenForwardSlash() {
assertThat((Boolean) ReflectionTestUtils.invokeMethod(importer, "isValidImportFilename", "etc/passwd")).isFalse();
}
@Test
void isValidImportFilename_returnsFalse_whenBackslash() {
assertThat((Boolean) ReflectionTestUtils.invokeMethod(importer, "isValidImportFilename", "..\\etc\\passwd")).isFalse();
}
@Test
void isValidImportFilename_returnsFalse_whenDotDot() {
assertThat((Boolean) ReflectionTestUtils.invokeMethod(importer, "isValidImportFilename", "doc..evil.pdf")).isFalse();
}
@Test
void isValidImportFilename_returnsFalse_whenIsDotDot() {
assertThat((Boolean) ReflectionTestUtils.invokeMethod(importer, "isValidImportFilename", "..")).isFalse();
}
@Test
void isValidImportFilename_returnsFalse_whenAbsolutePath() {
assertThat((Boolean) ReflectionTestUtils.invokeMethod(importer, "isValidImportFilename", "/etc/passwd")).isFalse();
}
@Test
void isValidImportFilename_returnsFalse_whenNullByte() {
assertThat((Boolean) ReflectionTestUtils.invokeMethod(importer, "isValidImportFilename", "file\0.pdf")).isFalse();
}
@Test
void isValidImportFilename_returnsFalse_whenUnicodeDivisionSlash() {
assertThat((Boolean) ReflectionTestUtils.invokeMethod(importer, "isValidImportFilename", "foobar.pdf")).isFalse();
}
@Test
void isValidImportFilename_returnsFalse_whenFullwidthSlash() {
assertThat((Boolean) ReflectionTestUtils.invokeMethod(importer, "isValidImportFilename", "foobar.pdf")).isFalse();
}
@Test
void isValidImportFilename_returnsFalse_whenReverseSolidusOperator() {
assertThat((Boolean) ReflectionTestUtils.invokeMethod(importer, "isValidImportFilename", "foobar.pdf")).isFalse();
}
@Test
void isValidImportFilename_returnsTrue_whenPlainBasename() {
assertThat((Boolean) ReflectionTestUtils.invokeMethod(importer, "isValidImportFilename", "document.pdf")).isTrue();
}
@Test
void isValidImportFilename_returnsTrue_whenLeadingDot() {
assertThat((Boolean) ReflectionTestUtils.invokeMethod(importer, "isValidImportFilename", ".hidden.pdf")).isTrue();
}
@Test
void isValidImportFilename_returnsTrue_whenHasSpaces() {
assertThat((Boolean) ReflectionTestUtils.invokeMethod(importer, "isValidImportFilename", "Brief an Oma.pdf")).isTrue();
}
@Test
void findFileRecursive_throwsDomainException_whenSymlinkEscapesImportDir(
@TempDir Path importDirPath, @TempDir Path outsideDir) throws Exception {
Path outsideFile = outsideDir.resolve("secret.pdf");
Files.writeString(outsideFile, "sensitive");
Files.createSymbolicLink(importDirPath.resolve("secret.pdf"), outsideFile);
ReflectionTestUtils.setField(importer, "importDir", importDirPath.toString());
org.assertj.core.api.Assertions.assertThatThrownBy(
() -> ReflectionTestUtils.invokeMethod(importer, "findFileRecursive", "secret.pdf"))
.isInstanceOf(org.raddatz.familienarchiv.exception.DomainException.class);
}
// ─── path traversal in the file column cannot escape importDir ───────────────────
@Test
void load_rejectsFileColumn_whenBasenameIsTraversalToken(@TempDir Path tempDir) throws Exception {
// A file column whose basename is itself a traversal token must be rejected
// outright, never used for disk I/O.
ReflectionTestUtils.setField(importer, "importDir", tempDir.toString());
Path xlsx = writeDocs(tempDir, docRow("W-0001", "evil/..", "", "", "", "", "", "", "", ""));
DocumentImporter.LoadResult result = importer.load(xlsx.toFile());
assertThat(result.skippedFiles())
.extracting(ImportStatus.SkippedFile::reason)
.containsExactly(ImportStatus.SkipReason.INVALID_FILENAME_PATH_TRAVERSAL);
verify(documentService, never()).save(any());
}
@Test
void load_traversalFileColumn_cannotEscapeImportDir_yieldsPlaceholder(@TempDir Path tempDir) throws Exception {
// ../../etc/cron.d/x reduces to basename "x"; the disk lookup is confined to
// importDir, so no file is found, nothing is uploaded, and the row becomes a
// metadata-only PLACEHOLDER — the file outside importDir is never read.
ReflectionTestUtils.setField(importer, "importDir", tempDir.toString());
when(documentService.findByOriginalFilename("W-0001")).thenReturn(Optional.empty());
when(documentService.save(any())).thenAnswer(inv -> inv.getArgument(0));
Path xlsx = writeDocs(tempDir, docRow("W-0001", "../../etc/cron.d/x", "", "", "", "", "", "", "", ""));
importer.load(xlsx.toFile());
verify(s3Client, never()).putObject(any(PutObjectRequest.class), any(RequestBody.class));
verify(documentService).save(org.mockito.ArgumentMatchers.argThat(d -> d.getStatus() == DocumentStatus.PLACEHOLDER));
}
// ─── PDF magic-byte guard — ported — do not remove ──────────────────────────────
@Test
void load_skipsFile_whenNotPdfMagicBytes(@TempDir Path tempDir) throws Exception {
ReflectionTestUtils.setField(importer, "importDir", tempDir.toString());
Files.writeString(tempDir.resolve("W-0001.pdf"), "not a pdf");
lenient().when(documentService.findByOriginalFilename(any())).thenReturn(Optional.empty());
Path xlsx = writeDocs(tempDir, docRow("W-0001", "..\\__scan\\W-0001.pdf", "", "", "", "", "", "", "", ""));
DocumentImporter.LoadResult result = importer.load(xlsx.toFile());
assertThat(result.skippedFiles())
.extracting(ImportStatus.SkippedFile::reason)
.containsExactly(ImportStatus.SkipReason.INVALID_PDF_SIGNATURE);
verify(s3Client, never()).putObject(any(PutObjectRequest.class), any(RequestBody.class));
}
@Test
void load_skipsFile_whenMagicByteCheckThrowsIoException(@TempDir Path tempDir) throws Exception {
ReflectionTestUtils.setField(importer, "importDir", tempDir.toString());
Files.writeString(tempDir.resolve("W-0001.pdf"), "content");
lenient().when(documentService.findByOriginalFilename(any())).thenReturn(Optional.empty());
Path xlsx = writeDocs(tempDir, docRow("W-0001", "..\\__scan\\W-0001.pdf", "", "", "", "", "", "", "", ""));
DocumentImporter spyImporter = org.mockito.Mockito.spy(importer);
org.mockito.Mockito.doThrow(new java.io.IOException("read error"))
.when(spyImporter).openFileStream(any(File.class));
DocumentImporter.LoadResult result = spyImporter.load(xlsx.toFile());
assertThat(result.skippedFiles())
.extracting(ImportStatus.SkippedFile::reason)
.containsExactly(ImportStatus.SkipReason.FILE_READ_ERROR);
}
@Test
void load_skipsAlreadyExists_whenDocumentUploadedNotPlaceholder(@TempDir Path tempDir) throws Exception {
ReflectionTestUtils.setField(importer, "importDir", tempDir.toString());
Document existing = Document.builder().id(UUID.randomUUID())
.originalFilename("W-0001").status(DocumentStatus.UPLOADED).build();
when(documentService.findByOriginalFilename("W-0001")).thenReturn(Optional.of(existing));
Path xlsx = writeDocs(tempDir, docRow("W-0001", "", "", "", "", "", "", "", "", ""));
DocumentImporter.LoadResult result = importer.load(xlsx.toFile());
assertThat(result.skippedFiles())
.extracting(ImportStatus.SkippedFile::reason)
.containsExactly(ImportStatus.SkipReason.ALREADY_EXISTS);
verify(documentService, never()).save(any());
}
// ─── file column drives status: present → UPLOADED, empty → PLACEHOLDER ───────────
@Test
void load_uploadsToS3_andSetsStatusUploaded_whenFilePresent(@TempDir Path tempDir) throws Exception {
ReflectionTestUtils.setField(importer, "importDir", tempDir.toString());
byte[] pdf = {0x25, 0x50, 0x44, 0x46, 0x2D};
Files.write(tempDir.resolve("W-0001.pdf"), pdf);
when(documentService.findByOriginalFilename("W-0001")).thenReturn(Optional.empty());
when(documentService.save(any())).thenAnswer(inv -> inv.getArgument(0));
Path xlsx = writeDocs(tempDir, docRow("W-0001", "..\\__scan\\W-0001.pdf", "", "", "", "", "", "", "", ""));
importer.load(xlsx.toFile());
verify(s3Client).putObject(any(PutObjectRequest.class), any(RequestBody.class));
verify(documentService).save(org.mockito.ArgumentMatchers.argThat(d -> d.getStatus() == DocumentStatus.UPLOADED));
}
@Test
void load_setsStatusPlaceholder_whenFileColumnEmpty(@TempDir Path tempDir) throws Exception {
ReflectionTestUtils.setField(importer, "importDir", tempDir.toString());
when(documentService.findByOriginalFilename("W-0099")).thenReturn(Optional.empty());
when(documentService.save(any())).thenAnswer(inv -> inv.getArgument(0));
Path xlsx = writeDocs(tempDir, docRow("W-0099", "", "", "", "", "", "", "", "", ""));
importer.load(xlsx.toFile());
verify(documentService).save(org.mockito.ArgumentMatchers.argThat(d -> d.getStatus() == DocumentStatus.PLACEHOLDER));
verify(s3Client, never()).putObject(any(PutObjectRequest.class), any(RequestBody.class));
}
// ─── attribution routing — register-first + always retain raw ────────────────────
@Test
void load_linksRegisterSender_andRetainsRawSenderText(@TempDir Path tempDir) throws Exception {
ReflectionTestUtils.setField(importer, "importDir", tempDir.toString());
Person walter = Person.builder().id(UUID.randomUUID()).sourceRef("de-gruyter-walter")
.firstName("Walter").lastName("de Gruyter").build();
when(documentService.findByOriginalFilename("W-0001")).thenReturn(Optional.empty());
when(documentService.save(any())).thenAnswer(inv -> inv.getArgument(0));
when(personService.findBySourceRef("de-gruyter-walter")).thenReturn(Optional.of(walter));
Path xlsx = writeDocs(tempDir, docRow("W-0001", "", "de-gruyter-walter", "Walter de Gruyter",
"", "", "", "", "", ""));
importer.load(xlsx.toFile());
verify(documentService).save(org.mockito.ArgumentMatchers.argThat(d ->
d.getSender() == walter && "Walter de Gruyter".equals(d.getSenderText())));
}
@Test
void load_createsProvisionalSender_whenSlugUnmatchedInRegister(@TempDir Path tempDir) throws Exception {
ReflectionTestUtils.setField(importer, "importDir", tempDir.toString());
Person provisional = Person.builder().id(UUID.randomUUID()).sourceRef("schwester-hanni")
.lastName("Schwester Hanni").provisional(true).build();
when(documentService.findByOriginalFilename("W-0002")).thenReturn(Optional.empty());
when(documentService.save(any())).thenAnswer(inv -> inv.getArgument(0));
when(personService.findBySourceRef("schwester-hanni")).thenReturn(Optional.empty());
when(personService.upsertBySourceRef(any())).thenReturn(provisional);
Path xlsx = writeDocs(tempDir, docRow("W-0002", "", "schwester-hanni", "Schwester Hanni",
"", "", "", "", "", ""));
importer.load(xlsx.toFile());
org.mockito.ArgumentCaptor<PersonUpsertCommand> captor =
org.mockito.ArgumentCaptor.forClass(PersonUpsertCommand.class);
verify(personService).upsertBySourceRef(captor.capture());
assertThat(captor.getValue().provisional()).isTrue();
assertThat(captor.getValue().lastName()).isEqualTo("Schwester Hanni");
}
@Test
void load_createsNoSenderPerson_whenSlugEmptyButRawPresent(@TempDir Path tempDir) throws Exception {
ReflectionTestUtils.setField(importer, "importDir", tempDir.toString());
when(documentService.findByOriginalFilename("W-0003")).thenReturn(Optional.empty());
when(documentService.save(any())).thenAnswer(inv -> inv.getArgument(0));
Path xlsx = writeDocs(tempDir, docRow("W-0003", "", "", "?",
"", "", "", "", "", ""));
importer.load(xlsx.toFile());
verify(personService, never()).findBySourceRef(any());
verify(personService, never()).upsertBySourceRef(any());
verify(documentService).save(org.mockito.ArgumentMatchers.argThat(d ->
d.getSender() == null && "?".equals(d.getSenderText())));
}
@Test
void load_splitsMultipleReceivers_andRetainsRawReceiverText(@TempDir Path tempDir) throws Exception {
ReflectionTestUtils.setField(importer, "importDir", tempDir.toString());
Person herbert = Person.builder().id(UUID.randomUUID()).sourceRef("cram-herbert").lastName("Cram").build();
Person clara = Person.builder().id(UUID.randomUUID()).sourceRef("clara").lastName("Clara").build();
when(documentService.findByOriginalFilename("W-0004")).thenReturn(Optional.empty());
when(documentService.save(any())).thenAnswer(inv -> inv.getArgument(0));
when(personService.findBySourceRef("cram-herbert")).thenReturn(Optional.of(herbert));
when(personService.findBySourceRef("clara")).thenReturn(Optional.of(clara));
Path xlsx = writeDocs(tempDir, docRow("W-0004", "", "", "",
"cram-herbert|clara", "Herbert Cram|Clara", "", "", "", ""));
importer.load(xlsx.toFile());
verify(documentService).save(org.mockito.ArgumentMatchers.argThat(d ->
d.getReceivers().size() == 2
&& d.getReceivers().contains(herbert)
&& d.getReceivers().contains(clara)
&& "Herbert Cram|Clara".equals(d.getReceiverText())));
}
// ─── clean date values parse without semantic logic ──────────────────────────────
@Test
void load_parsesCleanDateAndPrecision(@TempDir Path tempDir) throws Exception {
ReflectionTestUtils.setField(importer, "importDir", tempDir.toString());
when(documentService.findByOriginalFilename("W-0005")).thenReturn(Optional.empty());
when(documentService.save(any())).thenAnswer(inv -> inv.getArgument(0));
Path xlsx = writeDocs(tempDir, docRow("W-0005", "", "", "",
"", "", "1916-06-01", "1.6.1916", "MONTH", ""));
importer.load(xlsx.toFile());
verify(documentService).save(org.mockito.ArgumentMatchers.argThat(d ->
LocalDate.of(1916, 6, 1).equals(d.getDocumentDate())
&& d.getMetaDatePrecision() == org.raddatz.familienarchiv.document.DatePrecision.MONTH
&& "1.6.1916".equals(d.getMetaDateRaw())));
}
@Test
void load_attachesTagBySourceRef(@TempDir Path tempDir) throws Exception {
ReflectionTestUtils.setField(importer, "importDir", tempDir.toString());
Tag tag = Tag.builder().id(UUID.randomUUID()).name("Brautbriefe").sourceRef("Themen/Brautbriefe").build();
when(documentService.findByOriginalFilename("W-0006")).thenReturn(Optional.empty());
when(documentService.save(any())).thenAnswer(inv -> inv.getArgument(0));
when(tagService.findBySourceRef("Themen/Brautbriefe")).thenReturn(Optional.of(tag));
Path xlsx = writeDocs(tempDir, docRowWithTag("W-0006", "Themen/Brautbriefe"));
importer.load(xlsx.toFile());
verify(documentService).save(org.mockito.ArgumentMatchers.argThat(d -> d.getTags().contains(tag)));
}
// ─── idempotency — update existing document in place by index ─────────────────────
@Test
void load_updatesExistingDocumentInPlace_whenIndexExists(@TempDir Path tempDir) throws Exception {
ReflectionTestUtils.setField(importer, "importDir", tempDir.toString());
Document existing = Document.builder().id(UUID.randomUUID())
.originalFilename("W-0007").status(DocumentStatus.PLACEHOLDER).build();
when(documentService.findByOriginalFilename("W-0007")).thenReturn(Optional.of(existing));
when(documentService.save(any())).thenAnswer(inv -> inv.getArgument(0));
Path xlsx = writeDocs(tempDir, docRow("W-0007", "", "", "", "", "", "", "", "", ""));
importer.load(xlsx.toFile());
verify(documentService).save(org.mockito.ArgumentMatchers.argThat(d -> d.getId().equals(existing.getId())));
}
// ─── canonical collections are authoritative — re-import prunes removed links ──────
@Test
void load_prunesReceiversAndTags_whenCanonicalRowShrinks(@TempDir Path tempDir) throws Exception {
ReflectionTestUtils.setField(importer, "importDir", tempDir.toString());
Person staleReceiver = Person.builder().id(UUID.randomUUID()).sourceRef("stale-receiver").lastName("Stale").build();
Tag staleTag = Tag.builder().id(UUID.randomUUID()).name("Stale").sourceRef("Themen/Stale").build();
Document existing = Document.builder().id(UUID.randomUUID())
.originalFilename("W-0008").status(DocumentStatus.PLACEHOLDER).build();
existing.getReceivers().add(staleReceiver);
existing.getTags().add(staleTag);
when(documentService.findByOriginalFilename("W-0008")).thenReturn(Optional.of(existing));
when(documentService.save(any())).thenAnswer(inv -> inv.getArgument(0));
// The canonical row now carries no receiver and no tag: both stale links must go.
Path xlsx = writeDocs(tempDir, docRow("W-0008", "", "", "", "", "", "", "", "", ""));
importer.load(xlsx.toFile());
verify(documentService).save(org.mockito.ArgumentMatchers.argThat(d ->
d.getReceivers().isEmpty() && d.getTags().isEmpty()));
}
// ─── helpers ─────────────────────────────────────────────────────────────────────
private Map<String, String> docRow(String index, String file, String senderId, String senderName,
String receiverIds, String receiverNames, String dateIso,
String dateRaw, String datePrecision, String dateEnd) {
Map<String, String> r = new LinkedHashMap<>();
r.put("index", index);
r.put("file", file);
r.put("sender_person_id", senderId);
r.put("sender_name", senderName);
r.put("receiver_person_ids", receiverIds);
r.put("receiver_names", receiverNames);
r.put("date_iso", dateIso);
r.put("date_raw", dateRaw);
r.put("date_precision", datePrecision);
r.put("date_end", dateEnd);
r.put("location", "");
r.put("tags", "");
r.put("summary", "");
return r;
}
private Map<String, String> docRowWithTag(String index, String tagPath) {
Map<String, String> r = docRow(index, "", "", "", "", "", "", "", "", "");
r.put("tags", tagPath);
return r;
}
@SafeVarargs
private Path writeDocs(Path dir, Map<String, String>... rows) throws Exception {
Path xlsx = dir.resolve("canonical-documents.xlsx");
List<String> headers = List.of("index", "file", "sender_person_id", "sender_name",
"receiver_person_ids", "receiver_names", "date_iso", "date_raw", "date_precision",
"date_end", "location", "tags", "summary");
try (XSSFWorkbook wb = new XSSFWorkbook()) {
Sheet sheet = wb.createSheet("Sheet1");
Row header = sheet.createRow(0);
for (int i = 0; i < headers.size(); i++) {
header.createCell(i).setCellValue(headers.get(i));
}
for (int r = 0; r < rows.length; r++) {
Row row = sheet.createRow(r + 1);
for (int c = 0; c < headers.size(); c++) {
row.createCell(c).setCellValue(rows[r].getOrDefault(headers.get(c), ""));
}
}
try (OutputStream out = Files.newOutputStream(xlsx)) {
wb.write(out);
}
}
return xlsx;
}
}

View File

@@ -1,896 +0,0 @@
package org.raddatz.familienarchiv.importing;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.extension.ExtendWith;
import org.junit.jupiter.api.io.TempDir;
import org.mockito.Mock;
import org.mockito.junit.jupiter.MockitoExtension;
import org.raddatz.familienarchiv.exception.DomainException;
import org.raddatz.familienarchiv.document.Document;
import org.raddatz.familienarchiv.document.DocumentService;
import org.raddatz.familienarchiv.document.DocumentStatus;
import org.raddatz.familienarchiv.document.ThumbnailAsyncRunner;
import org.raddatz.familienarchiv.person.Person;
import org.raddatz.familienarchiv.tag.Tag;
import org.raddatz.familienarchiv.tag.TagService;
import org.raddatz.familienarchiv.person.PersonService;
import org.springframework.test.util.ReflectionTestUtils;
import software.amazon.awssdk.core.sync.RequestBody;
import software.amazon.awssdk.services.s3.S3Client;
import software.amazon.awssdk.services.s3.model.PutObjectRequest;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import org.xml.sax.SAXParseException;
import java.io.File;
import java.io.OutputStream;
import java.io.ByteArrayOutputStream;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.time.LocalDate;
import java.time.LocalDateTime;
import java.util.ArrayList;
import java.util.List;
import java.util.Optional;
import java.util.UUID;
import java.util.zip.ZipEntry;
import java.util.zip.ZipOutputStream;
import static org.assertj.core.api.Assertions.assertThat;
import static org.assertj.core.api.Assertions.assertThatThrownBy;
import static org.mockito.ArgumentMatchers.any;
import static org.mockito.Mockito.*;
@ExtendWith(MockitoExtension.class)
class MassImportServiceTest {
@Mock DocumentService documentService;
@Mock PersonService personService;
@Mock TagService tagService;
@Mock S3Client s3Client;
@Mock ThumbnailAsyncRunner thumbnailAsyncRunner;
MassImportService service;
@BeforeEach
void setUp() {
service = new MassImportService(documentService, personService, tagService, s3Client, thumbnailAsyncRunner);
ReflectionTestUtils.setField(service, "bucketName", "test-bucket");
ReflectionTestUtils.setField(service, "importDir", "/import");
ReflectionTestUtils.setField(service, "colIndex", 0);
ReflectionTestUtils.setField(service, "colBox", 1);
ReflectionTestUtils.setField(service, "colFolder", 2);
ReflectionTestUtils.setField(service, "colSender", 3);
ReflectionTestUtils.setField(service, "colReceivers", 5);
ReflectionTestUtils.setField(service, "colDate", 7);
ReflectionTestUtils.setField(service, "colLocation", 9);
ReflectionTestUtils.setField(service, "colTags", 10);
ReflectionTestUtils.setField(service, "colSummary", 11);
ReflectionTestUtils.setField(service, "colTranscription", 13);
}
// ─── getStatus ────────────────────────────────────────────────────────────
@Test
void getStatus_returnsIdleByDefault() {
assertThat(service.getStatus().state()).isEqualTo(MassImportService.State.IDLE);
}
@Test
void getStatus_hasStatusCode_IMPORT_IDLE_byDefault() {
assertThat(service.getStatus().statusCode()).isEqualTo("IMPORT_IDLE");
}
// ─── runImportAsync ───────────────────────────────────────────────────────
@Test
void runImportAsync_setsFailedStatus_whenImportDirectoryDoesNotExist() {
// /import directory doesn't exist in test environment → IOException → IMPORT_FAILED_INTERNAL
service.runImportAsync();
assertThat(service.getStatus().state()).isEqualTo(MassImportService.State.FAILED);
assertThat(service.getStatus().statusCode()).isEqualTo("IMPORT_FAILED_INTERNAL");
}
@Test
void runImportAsync_readsFromConfiguredImportDir(@TempDir Path tempDir) {
// Empty temp dir → findSpreadsheetFile throws "no spreadsheet" with the
// configured path in the message. Proves the field, not a constant,
// drives the lookup.
ReflectionTestUtils.setField(service, "importDir", tempDir.toString());
service.runImportAsync();
assertThat(service.getStatus().state()).isEqualTo(MassImportService.State.FAILED);
assertThat(service.getStatus().message()).contains(tempDir.toString());
}
@Test
void runImportAsync_setsStatusCode_IMPORT_FAILED_NO_SPREADSHEET_whenDirIsEmpty(@TempDir Path tempDir) {
ReflectionTestUtils.setField(service, "importDir", tempDir.toString());
service.runImportAsync();
assertThat(service.getStatus().statusCode()).isEqualTo("IMPORT_FAILED_NO_SPREADSHEET");
}
@Test
void runImportAsync_setsStatusCode_IMPORT_DONE_whenSpreadsheetHasNoDataRows(@TempDir Path tempDir) throws Exception {
Path xlsx = tempDir.resolve("import.xlsx");
try (XSSFWorkbook wb = new XSSFWorkbook()) {
wb.createSheet("Sheet1");
try (OutputStream out = Files.newOutputStream(xlsx)) {
wb.write(out);
}
}
ReflectionTestUtils.setField(service, "importDir", tempDir.toString());
service.runImportAsync();
assertThat(service.getStatus().statusCode()).isEqualTo("IMPORT_DONE");
}
@Test
void runImportAsync_throwsConflict_whenAlreadyRunning() {
MassImportService.ImportStatus running = new MassImportService.ImportStatus(
MassImportService.State.RUNNING, "IMPORT_RUNNING", "Running...", 0, List.of(), LocalDateTime.now());
ReflectionTestUtils.setField(service, "currentStatus", running);
assertThatThrownBy(() -> service.runImportAsync())
.isInstanceOf(DomainException.class)
.hasMessageContaining("already in progress");
}
// ─── importSingleDocument — skip already uploaded ─────────────────────────
@Test
void importSingleDocument_skips_whenDocumentAlreadyUploadedNotPlaceholder() {
Document existing = Document.builder()
.id(UUID.randomUUID())
.originalFilename("doc001.pdf")
.status(DocumentStatus.UPLOADED)
.build();
when(documentService.findByOriginalFilename("doc001.pdf")).thenReturn(Optional.of(existing));
Optional<MassImportService.SkipReason> result = service.importSingleDocument(minimalCells("doc001.pdf"), Optional.empty(), "doc001.pdf", "doc001");
verify(documentService, never()).save(any());
assertThat(result).isPresent().contains(MassImportService.SkipReason.ALREADY_EXISTS);
}
// ─── importSingleDocument — already-exists guard fires before file I/O ─────
@Test
void importSingleDocument_skipsWithAlreadyExists_whenDocumentUploadedAndFileIsPresent(@TempDir Path tempDir) throws Exception {
// Document already exists with status UPLOADED (not PLACEHOLDER).
// A physical PDF file is also present on disk (valid magic bytes).
// Expected: ALREADY_EXISTS is returned and no S3 upload is attempted —
// the guard fires before any file I/O, so no partial processing occurs.
Document existing = Document.builder()
.id(UUID.randomUUID())
.originalFilename("present.pdf")
.status(DocumentStatus.UPLOADED)
.build();
when(documentService.findByOriginalFilename("present.pdf")).thenReturn(Optional.of(existing));
Path physicalFile = tempDir.resolve("present.pdf");
byte[] pdfHeader = {0x25, 0x50, 0x44, 0x46, 0x2D}; // %PDF-
Files.write(physicalFile, pdfHeader);
Optional<MassImportService.SkipReason> result = service.importSingleDocument(
minimalCells("present.pdf"), Optional.of(physicalFile.toFile()), "present.pdf", "present");
assertThat(result).isPresent().contains(MassImportService.SkipReason.ALREADY_EXISTS);
verify(s3Client, never()).putObject(any(PutObjectRequest.class), any(RequestBody.class));
verify(documentService, never()).save(any());
}
// ─── importSingleDocument — S3 failure surfaced in skippedFiles ──────────
@Test
void runImportAsync_addsS3UploadFailed_toSkippedFiles_whenS3Throws(@TempDir Path tempDir) throws Exception {
byte[] pdfHeader = {0x25, 0x50, 0x44, 0x46, 0x2D}; // %PDF-
Files.write(tempDir.resolve("upload_fail.pdf"), pdfHeader);
buildMinimalImportXlsx(tempDir, "upload_fail.pdf");
ReflectionTestUtils.setField(service, "importDir", tempDir.toString());
when(documentService.findByOriginalFilename("upload_fail.pdf")).thenReturn(Optional.empty());
doThrow(new RuntimeException("S3 unavailable"))
.when(s3Client).putObject(any(PutObjectRequest.class), any(RequestBody.class));
service.runImportAsync();
assertThat(service.getStatus().skipped()).isEqualTo(1);
assertThat(service.getStatus().skippedFiles())
.extracting(MassImportService.SkippedFile::filename, MassImportService.SkippedFile::reason)
.containsExactly(org.assertj.core.groups.Tuple.tuple("upload_fail.pdf", MassImportService.SkipReason.S3_UPLOAD_FAILED));
}
@Test
void runImportAsync_addsAlreadyExists_toSkippedFiles_whenDocumentAlreadyUploaded(@TempDir Path tempDir) throws Exception {
buildMinimalImportXlsx(tempDir, "existing.pdf");
ReflectionTestUtils.setField(service, "importDir", tempDir.toString());
Document existing = Document.builder()
.id(UUID.randomUUID())
.originalFilename("existing.pdf")
.status(DocumentStatus.UPLOADED)
.build();
when(documentService.findByOriginalFilename("existing.pdf")).thenReturn(Optional.of(existing));
service.runImportAsync();
assertThat(service.getStatus().skipped()).isEqualTo(1);
assertThat(service.getStatus().skippedFiles())
.extracting(MassImportService.SkippedFile::reason)
.containsExactly(MassImportService.SkipReason.ALREADY_EXISTS);
}
// ─── importSingleDocument — create new document (metadata only) ───────────
@Test
void importSingleDocument_createsNewDocument_whenNotExists() {
when(documentService.findByOriginalFilename("doc002.pdf")).thenReturn(Optional.empty());
when(documentService.save(any())).thenAnswer(inv -> inv.getArgument(0));
service.importSingleDocument(minimalCells("doc002.pdf"), Optional.empty(), "doc002.pdf", "doc002");
verify(documentService).save(argThat(d ->
d.getOriginalFilename().equals("doc002.pdf")
&& d.getStatus() == DocumentStatus.PLACEHOLDER));
}
// ─── importSingleDocument — update existing placeholder ──────────────────
@Test
void importSingleDocument_updatesExistingPlaceholder() {
Document placeholder = Document.builder()
.id(UUID.randomUUID())
.originalFilename("existing.pdf")
.status(DocumentStatus.PLACEHOLDER)
.build();
when(documentService.findByOriginalFilename("existing.pdf")).thenReturn(Optional.of(placeholder));
when(documentService.save(any())).thenAnswer(inv -> inv.getArgument(0));
service.importSingleDocument(minimalCells("existing.pdf"), Optional.empty(), "existing.pdf", "existing");
verify(documentService).save(same(placeholder));
}
// ─── importSingleDocument — with file (S3 upload) ─────────────────────────
@Test
void importSingleDocument_uploadsFileToS3_andSetsStatusUploaded(@TempDir Path tempDir) throws Exception {
Path tempFile = tempDir.resolve("doc003.pdf");
Files.write(tempFile, "PDF content".getBytes());
when(documentService.findByOriginalFilename("doc003.pdf")).thenReturn(Optional.empty());
when(documentService.save(any())).thenAnswer(inv -> inv.getArgument(0));
service.importSingleDocument(
minimalCells("doc003.pdf"), Optional.of(tempFile.toFile()), "doc003.pdf", "doc003");
verify(s3Client).putObject(any(PutObjectRequest.class), any(RequestBody.class));
verify(documentService).save(argThat(d -> d.getStatus() == DocumentStatus.UPLOADED));
}
@Test
void importSingleDocument_returnsS3UploadFailed_whenS3UploadFails(@TempDir Path tempDir) throws Exception {
Path tempFile = tempDir.resolve("fail.pdf");
Files.write(tempFile, "data".getBytes());
when(documentService.findByOriginalFilename("fail.pdf")).thenReturn(Optional.empty());
doThrow(new RuntimeException("S3 error"))
.when(s3Client).putObject(any(PutObjectRequest.class), any(RequestBody.class));
Optional<MassImportService.SkipReason> result = service.importSingleDocument(
minimalCells("fail.pdf"), Optional.of(tempFile.toFile()), "fail.pdf", "fail");
verify(documentService, never()).save(any());
assertThat(result).isPresent().contains(MassImportService.SkipReason.S3_UPLOAD_FAILED);
}
// ─── importSingleDocument — sender handling ───────────────────────────────
@Test
void importSingleDocument_setsNullSender_whenSenderCellIsBlank() {
when(documentService.findByOriginalFilename("nosender.pdf")).thenReturn(Optional.empty());
when(documentService.save(any())).thenAnswer(inv -> inv.getArgument(0));
List<String> cells = buildCells("nosender.pdf", "", "", "");
service.importSingleDocument(cells, Optional.empty(), "nosender.pdf", "nosender");
verify(documentService).save(argThat(d -> d.getSender() == null));
verify(personService, never()).findOrCreateByAlias(any());
}
@Test
void importSingleDocument_createsSender_whenSenderCellIsNonBlank() {
Person sender = Person.builder().id(UUID.randomUUID()).firstName("Walter").lastName("Müller").build();
when(documentService.findByOriginalFilename("withsender.pdf")).thenReturn(Optional.empty());
when(documentService.save(any())).thenAnswer(inv -> inv.getArgument(0));
when(personService.findOrCreateByAlias("Walter Müller")).thenReturn(sender);
List<String> cells = buildCells("withsender.pdf", "Walter Müller", "", "");
service.importSingleDocument(cells, Optional.empty(), "withsender.pdf", "withsender");
verify(personService).findOrCreateByAlias("Walter Müller");
verify(documentService).save(argThat(d -> d.getSender() == sender));
}
// ─── importSingleDocument — tag handling ─────────────────────────────────
@Test
void importSingleDocument_createsTag_whenTagCellIsNonBlank() {
Tag tag = Tag.builder().id(UUID.randomUUID()).name("Familie").build();
when(documentService.findByOriginalFilename("tagged.pdf")).thenReturn(Optional.empty());
when(documentService.save(any())).thenAnswer(inv -> inv.getArgument(0));
when(tagService.findOrCreate("Familie")).thenReturn(tag);
List<String> cells = buildCells("tagged.pdf", "", "", "Familie");
service.importSingleDocument(cells, Optional.empty(), "tagged.pdf", "tagged");
verify(tagService).findOrCreate("Familie");
}
@Test
void importSingleDocument_doesNotCreateTag_whenTagCellIsBlank() {
when(documentService.findByOriginalFilename("notag.pdf")).thenReturn(Optional.empty());
when(documentService.save(any())).thenAnswer(inv -> inv.getArgument(0));
List<String> cells = buildCells("notag.pdf", "", "", "");
service.importSingleDocument(cells, Optional.empty(), "notag.pdf", "notag");
verify(tagService, never()).findOrCreate(any());
}
// ─── importSingleDocument — metadataComplete heuristic ───────────────────
@Test
void importSingleDocument_metadataComplete_whenSenderPresent() {
Person sender = Person.builder().id(UUID.randomUUID()).firstName("A").lastName("B").build();
when(documentService.findByOriginalFilename("meta.pdf")).thenReturn(Optional.empty());
when(documentService.save(any())).thenAnswer(inv -> inv.getArgument(0));
when(personService.findOrCreateByAlias("A B")).thenReturn(sender);
List<String> cells = buildCells("meta.pdf", "A B", "", "");
service.importSingleDocument(cells, Optional.empty(), "meta.pdf", "meta");
verify(documentService).save(argThat(Document::isMetadataComplete));
}
@Test
void importSingleDocument_metadataIncomplete_whenNoKeyFieldsPresent() {
when(documentService.findByOriginalFilename("nometa.pdf")).thenReturn(Optional.empty());
when(documentService.save(any())).thenAnswer(inv -> inv.getArgument(0));
List<String> cells = buildCells("nometa.pdf", "", "", "");
service.importSingleDocument(cells, Optional.empty(), "nometa.pdf", "nometa");
verify(documentService).save(argThat(d -> !d.isMetadataComplete()));
}
// ─── importSingleDocument — blank fields set to null ─────────────────────
@Test
void importSingleDocument_setsBlankFieldsToNull() {
when(documentService.findByOriginalFilename("blank.pdf")).thenReturn(Optional.empty());
when(documentService.save(any())).thenAnswer(inv -> inv.getArgument(0));
List<String> cells = buildCells("blank.pdf", "", "", "");
service.importSingleDocument(cells, Optional.empty(), "blank.pdf", "blank");
verify(documentService).save(argThat(d ->
d.getLocation() == null &&
d.getSummary() == null &&
d.getTranscription() == null &&
d.getArchiveBox() == null &&
d.getArchiveFolder() == null));
}
// ─── processRows — via ReflectionTestUtils ────────────────────────────────
@Test
void processRows_returnsZero_whenOnlyHeaderRow() {
List<List<String>> rows = List.of(List.of("header", "col1"));
MassImportService.ProcessResult result = ReflectionTestUtils.invokeMethod(service, "processRows", rows);
assertThat(result.processed()).isEqualTo(0);
}
@Test
void processRows_skipsRowWithBlankIndex() {
List<List<String>> rows = List.of(
List.of("header"),
minimalCells("") // blank index
);
MassImportService.ProcessResult result = ReflectionTestUtils.invokeMethod(service, "processRows", rows);
assertThat(result.processed()).isEqualTo(0);
verify(documentService, never()).findByOriginalFilename(any());
}
@Test
void processRows_addsExtension_whenIndexHasNoDot() {
when(documentService.findByOriginalFilename("doc001.pdf")).thenReturn(Optional.empty());
when(documentService.save(any())).thenAnswer(inv -> inv.getArgument(0));
List<List<String>> rows = List.of(
List.of("header"),
minimalCells("doc001") // no dot → appends ".pdf"
);
MassImportService.ProcessResult result = ReflectionTestUtils.invokeMethod(service, "processRows", rows);
assertThat(result.processed()).isEqualTo(1);
verify(documentService).findByOriginalFilename("doc001.pdf");
}
@Test
void processRows_usesFilenameAsIs_whenIndexHasDot() {
when(documentService.findByOriginalFilename("doc002.pdf")).thenReturn(Optional.empty());
when(documentService.save(any())).thenAnswer(inv -> inv.getArgument(0));
List<List<String>> rows = List.of(
List.of("header"),
minimalCells("doc002.pdf") // has dot → used as-is
);
MassImportService.ProcessResult result = ReflectionTestUtils.invokeMethod(service, "processRows", rows);
assertThat(result.processed()).isEqualTo(1);
verify(documentService).findByOriginalFilename("doc002.pdf");
}
// ─── isValidImportFilename — security regression — do not remove ─────────
@Test
void isValidImportFilename_returnsFalse_whenFilenameIsNull() {
boolean result = ReflectionTestUtils.invokeMethod(service, "isValidImportFilename", (String) null);
assertThat(result).isFalse();
}
@Test
void isValidImportFilename_returnsFalse_whenFilenameIsBlank() {
boolean result = ReflectionTestUtils.invokeMethod(service, "isValidImportFilename", " ");
assertThat(result).isFalse();
}
@Test
void isValidImportFilename_returnsFalse_whenFilenameContainsForwardSlash() {
boolean result = ReflectionTestUtils.invokeMethod(service, "isValidImportFilename", "etc/passwd");
assertThat(result).isFalse();
}
@Test
void isValidImportFilename_returnsFalse_whenFilenameContainsBackslash() {
boolean result = ReflectionTestUtils.invokeMethod(service, "isValidImportFilename", "..\\etc\\passwd");
assertThat(result).isFalse();
}
@Test
void isValidImportFilename_returnsFalse_whenFilenameContainsDotDot() {
boolean result = ReflectionTestUtils.invokeMethod(service, "isValidImportFilename", "doc..evil.pdf");
assertThat(result).isFalse();
}
@Test
void isValidImportFilename_returnsFalse_whenFilenameIsDotDot() {
boolean result = ReflectionTestUtils.invokeMethod(service, "isValidImportFilename", "..");
assertThat(result).isFalse();
}
@Test
void isValidImportFilename_returnsFalse_whenFilenameIsAbsolutePath() {
boolean result = ReflectionTestUtils.invokeMethod(service, "isValidImportFilename", "/etc/passwd");
assertThat(result).isFalse();
}
@Test
void isValidImportFilename_returnsFalse_whenFilenameContainsNullByte() {
boolean result = ReflectionTestUtils.invokeMethod(service, "isValidImportFilename", "file\0.pdf");
assertThat(result).isFalse();
}
@Test
void isValidImportFilename_returnsTrue_whenFilenameIsPlainBasename() {
boolean result = ReflectionTestUtils.invokeMethod(service, "isValidImportFilename", "document.pdf");
assertThat(result).isTrue();
}
@Test
void isValidImportFilename_returnsFalse_whenFilenameContainsUnicodeDivisionSlash() {
boolean result = ReflectionTestUtils.invokeMethod(service, "isValidImportFilename", "foobar.pdf");
assertThat(result).isFalse();
}
@Test
void isValidImportFilename_returnsFalse_whenFilenameContainsFullwidthSlash() {
boolean result = ReflectionTestUtils.invokeMethod(service, "isValidImportFilename", "foobar.pdf");
assertThat(result).isFalse();
}
@Test
void isValidImportFilename_returnsFalse_whenFilenameContainsUnicodeReverseSolidus() {
boolean result = ReflectionTestUtils.invokeMethod(service, "isValidImportFilename", "foobar.pdf");
assertThat(result).isFalse();
}
@Test
void isValidImportFilename_returnsTrue_whenFilenameHasLeadingDot() {
boolean result = ReflectionTestUtils.invokeMethod(service, "isValidImportFilename", ".hidden.pdf");
assertThat(result).isTrue();
}
@Test
void isValidImportFilename_returnsTrue_whenFilenameHasSpaces() {
boolean result = ReflectionTestUtils.invokeMethod(service, "isValidImportFilename", "Brief an Oma.pdf");
assertThat(result).isTrue();
}
@Test
void processRows_skipsRowAndContinues_whenFilenameIsPathTraversal() {
when(documentService.findByOriginalFilename("legitimate.pdf")).thenReturn(Optional.empty());
when(documentService.save(any())).thenAnswer(inv -> inv.getArgument(0));
List<List<String>> rows = List.of(
List.of("header"),
minimalCells("../evil"), // row 1: path traversal — should be skipped
minimalCells("legitimate.pdf") // row 2: valid — should be processed
);
MassImportService.ProcessResult result = ReflectionTestUtils.invokeMethod(service, "processRows", rows);
assertThat(result.processed()).isEqualTo(1);
assertThat(result.skippedFiles())
.extracting(MassImportService.SkippedFile::reason)
.containsExactly(MassImportService.SkipReason.INVALID_FILENAME_PATH_TRAVERSAL);
}
// ─── importSingleDocument — non-blank optional fields ────────────────────
@Test
void importSingleDocument_setsNonNullOptionalFields_whenPresent() {
when(documentService.findByOriginalFilename("rich.pdf")).thenReturn(Optional.empty());
when(documentService.save(any())).thenAnswer(inv -> inv.getArgument(0));
// box=1, folder=2, location=9, summary=11, transcription=13
List<String> cells = List.of(
"rich.pdf", // 0: index
"Box A", // 1: box
"Folder B", // 2: folder
"", // 3: sender
"", // 4: unused
"", // 5: receivers
"", // 6: unused
"", // 7: date
"", // 8: unused
"Hamburg", // 9: location
"", // 10: tags
"A summary", // 11: summary
"", // 12: unused
"A transcript" // 13: transcription
);
service.importSingleDocument(cells, Optional.empty(), "rich.pdf", "rich");
verify(documentService).save(argThat(d ->
"Box A".equals(d.getArchiveBox()) &&
"Folder B".equals(d.getArchiveFolder()) &&
"Hamburg".equals(d.getLocation()) &&
"A summary".equals(d.getSummary()) &&
"A transcript".equals(d.getTranscription())));
}
@Test
void importSingleDocument_setsMetadataComplete_whenReceiversArePresent() {
Person receiver = Person.builder().id(UUID.randomUUID()).firstName("Walter").lastName("Müller").build();
when(documentService.findByOriginalFilename("rcv.pdf")).thenReturn(Optional.empty());
when(documentService.save(any())).thenAnswer(inv -> inv.getArgument(0));
when(personService.findOrCreateByAlias("Walter Müller")).thenReturn(receiver);
List<String> cells = List.of(
"rcv.pdf", "", "", "", "", "Walter Müller", "", "", "", "", "", "", "", "");
service.importSingleDocument(cells, Optional.empty(), "rcv.pdf", "rcv");
verify(documentService).save(argThat(Document::isMetadataComplete));
}
@Test
void importSingleDocument_setsMetadataComplete_whenDateIsPresent() {
when(documentService.findByOriginalFilename("dated.pdf")).thenReturn(Optional.empty());
when(documentService.save(any())).thenAnswer(inv -> inv.getArgument(0));
List<String> cells = List.of(
"dated.pdf", "", "", "", "", "", "", "2024-03-15", "", "", "", "", "", "");
service.importSingleDocument(cells, Optional.empty(), "dated.pdf", "dated");
verify(documentService).save(argThat(Document::isMetadataComplete));
}
// ─── buildTitle — null location ───────────────────────────────────────────
@Test
void buildTitle_withNullLocation_skipsLocationPart() {
String result = ReflectionTestUtils.invokeMethod(service, "buildTitle",
"doc005", LocalDate.of(1940, 5, 1), (String) null);
assertThat(result).contains("doc005").contains("1940");
assertThat(result).doesNotContain("Berlin");
}
// ─── parseDate — via ReflectionTestUtils ─────────────────────────────────
@Test
void parseDate_returnsNull_whenValueIsNull() {
LocalDate result = ReflectionTestUtils.invokeMethod(service, "parseDate", (String) null);
assertThat(result).isNull();
}
@Test
void parseDate_returnsNull_whenValueIsBlank() {
LocalDate result = ReflectionTestUtils.invokeMethod(service, "parseDate", " ");
assertThat(result).isNull();
}
@Test
void parseDate_returnsDate_whenValidIsoFormat() {
LocalDate result = ReflectionTestUtils.invokeMethod(service, "parseDate", "2024-03-15");
assertThat(result).isEqualTo(LocalDate.of(2024, 3, 15));
}
@Test
void parseDate_returnsNull_whenInvalidDateString() {
LocalDate result = ReflectionTestUtils.invokeMethod(service, "parseDate", "15.03.2024");
assertThat(result).isNull();
}
// ─── buildTitle — via ReflectionTestUtils ────────────────────────────────
@Test
void buildTitle_withDateAndLocation() {
String result = ReflectionTestUtils.invokeMethod(service, "buildTitle",
"doc001", LocalDate.of(1940, 5, 1), "Berlin");
assertThat(result).contains("doc001").contains("Berlin").contains("1940");
}
@Test
void buildTitle_withDateOnly() {
String result = ReflectionTestUtils.invokeMethod(service, "buildTitle",
"doc002", LocalDate.of(1960, 8, 15), "");
assertThat(result).contains("doc002").contains("1960");
assertThat(result).doesNotContain("Berlin");
}
@Test
void buildTitle_withIndexOnly_whenDateAndLocationAreNull() {
String result = ReflectionTestUtils.invokeMethod(service, "buildTitle",
"doc003", null, "");
assertThat(result).isEqualTo("doc003");
}
@Test
void buildTitle_withLocationOnly_whenDateIsNull() {
// date=null, location present → date part skipped, location appended
String result = ReflectionTestUtils.invokeMethod(service, "buildTitle",
"doc004", null, "Berlin");
assertThat(result).contains("doc004").contains("Berlin");
assertThat(result).doesNotContain("("); // no date part
}
// ─── getCell — via ReflectionTestUtils ───────────────────────────────────
@Test
void getCell_returnsEmptyString_whenColBeyondListSize() {
List<String> cells = List.of("a", "b");
String result = ReflectionTestUtils.invokeMethod(service, "getCell", cells, 5);
assertThat(result).isEmpty();
}
@Test
void getCell_returnsEmptyString_whenValueIsNull() {
List<String> cells = new ArrayList<>();
cells.add(null);
cells.add("b");
String result = ReflectionTestUtils.invokeMethod(service, "getCell", cells, 0);
assertThat(result).isEmpty();
}
@Test
void getCell_returnsTrimmedValue() {
List<String> cells = List.of(" hello ", "world");
String result = ReflectionTestUtils.invokeMethod(service, "getCell", cells, 0);
assertThat(result).isEqualTo("hello");
}
// ─── PDF magic byte validation regression ─────────────────────────────────
@Test
void runImportAsync_uploadsValidPdf_andSkipsFakeOne(@TempDir Path tempDir) throws Exception {
setupOneValidOneFakeImport(tempDir);
service.runImportAsync();
verify(s3Client, times(1)).putObject(any(PutObjectRequest.class), any(RequestBody.class));
}
@Test
void runImportAsync_setsSkippedCount_toOne_whenOneFakeFile(@TempDir Path tempDir) throws Exception {
setupOneValidOneFakeImport(tempDir);
service.runImportAsync();
assertThat(service.getStatus().skipped()).isEqualTo(1);
}
@Test
void runImportAsync_includesRejectedFilename_inSkippedFiles(@TempDir Path tempDir) throws Exception {
setupOneValidOneFakeImport(tempDir);
service.runImportAsync();
assertThat(service.getStatus().skippedFiles())
.extracting(MassImportService.SkippedFile::filename)
.contains("fake.pdf");
}
@Test
void runImportAsync_skipsFile_whenShorterThanFourBytes(@TempDir Path tempDir) throws Exception {
Files.write(tempDir.resolve("tiny.pdf"), new byte[]{0x25, 0x50, 0x44}); // only 3 bytes
buildMinimalImportXlsx(tempDir, "tiny.pdf");
ReflectionTestUtils.setField(service, "importDir", tempDir.toString());
lenient().when(documentService.findByOriginalFilename(any())).thenReturn(Optional.empty());
service.runImportAsync();
assertThat(service.getStatus().skipped()).isEqualTo(1);
}
@Test
void runImportAsync_skipsFile_whenMagicBytesCheckThrowsIOException(@TempDir Path tempDir) throws Exception {
Files.writeString(tempDir.resolve("unreadable.pdf"), "some content");
buildMinimalImportXlsx(tempDir, "unreadable.pdf");
ReflectionTestUtils.setField(service, "importDir", tempDir.toString());
lenient().when(documentService.findByOriginalFilename(any())).thenReturn(Optional.empty());
MassImportService spyService = spy(service);
doThrow(new java.io.IOException("simulated read error")).when(spyService).openFileStream(any(File.class));
spyService.runImportAsync();
assertThat(spyService.getStatus().skipped()).isEqualTo(1);
assertThat(spyService.getStatus().skippedFiles())
.extracting(MassImportService.SkippedFile::reason)
.containsExactly(MassImportService.SkipReason.FILE_READ_ERROR);
}
// ─── findFileRecursive — symlink escape security regression — do not remove ─
@Test
void findFileRecursive_throwsDomainException_whenSymlinkEscapesImportDir(
@TempDir Path importDirPath, @TempDir Path outsideDir) throws Exception {
Path outsideFile = outsideDir.resolve("secret.pdf");
Files.writeString(outsideFile, "sensitive content");
Files.createSymbolicLink(importDirPath.resolve("secret.pdf"), outsideFile);
ReflectionTestUtils.setField(service, "importDir", importDirPath.toString());
assertThatThrownBy(() -> ReflectionTestUtils.invokeMethod(service, "findFileRecursive", "secret.pdf"))
.isInstanceOf(DomainException.class);
}
// ─── readOds — XXE security regression ───────────────────────────────────
// Security regression — do not remove.
@Test
void readOds_rejects_xxe_doctype_payload(@TempDir Path tempDir) throws Exception {
File malicious = buildXxeOds(tempDir, "file:///etc/hostname");
assertThatThrownBy(() -> service.readOds(malicious))
.isInstanceOf(SAXParseException.class)
.hasMessageContaining("DOCTYPE is disallowed");
}
@Test
void readOds_parses_valid_ods_correctly(@TempDir Path tempDir) throws Exception {
File valid = buildValidOds(tempDir, "Mustermann");
List<List<String>> rows = service.readOds(valid);
assertThat(rows).isNotEmpty();
assertThat(rows.get(0)).contains("Mustermann");
}
// ─── helpers ──────────────────────────────────────────────────────────────
/**
* Builds a minimal 14-element cell row with the given filename at index 0
* and blanks for all optional fields.
*/
private List<String> minimalCells(String filename) {
return buildCells(filename, "", "", "");
}
/**
* Builds a cell row with sender, receiver, and tag controls.
* Layout matches the default column indices set in setUp().
*/
private List<String> buildCells(String filename, String sender, String receivers, String tag) {
// 14 elements: index=0,box=1,folder=2,sender=3,[4],receivers=5,[6],date=7,[8],location=9,tag=10,summary=11,[12],transcription=13
return List.of(
filename, // 0: index
"", // 1: box
"", // 2: folder
sender, // 3: sender
"", // 4: (unused)
receivers, // 5: receivers
"", // 6: (unused)
"", // 7: date
"", // 8: (unused)
"", // 9: location
tag, // 10: tags
"", // 11: summary
"", // 12: (unused)
"" // 13: transcription
);
}
/** Creates a minimal ODS ZIP containing a content.xml with an XXE payload. */
private File buildXxeOds(Path dir, String entityTarget) throws Exception {
String xml = "<?xml version=\"1.0\"?>"
+ "<!DOCTYPE foo [<!ENTITY xxe SYSTEM \"" + entityTarget + "\">]>"
+ "<office:document-content"
+ " xmlns:office=\"urn:oasis:names:tc:opendocument:xmlns:office:1.0\""
+ " xmlns:table=\"urn:oasis:names:tc:opendocument:xmlns:table:1.0\""
+ " xmlns:text=\"urn:oasis:names:tc:opendocument:xmlns:text:1.0\">"
+ "<office:body><office:spreadsheet>"
+ "<table:table><table:table-row><table:table-cell>"
+ "<text:p>&xxe;</text:p>"
+ "</table:table-cell></table:table-row></table:table>"
+ "</office:spreadsheet></office:body>"
+ "</office:document-content>";
return writeOdsZip(dir.resolve("malicious.ods"), xml);
}
/** Creates a minimal valid ODS ZIP containing a content.xml with the given cell value.
* cellValue must not contain XML metacharacters ({@code < > &}). */
private File buildValidOds(Path dir, String cellValue) throws Exception {
String xml = "<?xml version=\"1.0\"?>"
+ "<office:document-content"
+ " xmlns:office=\"urn:oasis:names:tc:opendocument:xmlns:office:1.0\""
+ " xmlns:table=\"urn:oasis:names:tc:opendocument:xmlns:table:1.0\""
+ " xmlns:text=\"urn:oasis:names:tc:opendocument:xmlns:text:1.0\">"
+ "<office:body><office:spreadsheet>"
+ "<table:table><table:table-row><table:table-cell>"
+ "<text:p>" + cellValue + "</text:p>"
+ "</table:table-cell></table:table-row></table:table>"
+ "</office:spreadsheet></office:body>"
+ "</office:document-content>";
return writeOdsZip(dir.resolve("valid.ods"), xml);
}
private File writeOdsZip(Path destination, String contentXml) throws Exception {
try (OutputStream fos = Files.newOutputStream(destination);
ZipOutputStream zip = new ZipOutputStream(fos)) {
zip.putNextEntry(new ZipEntry("content.xml"));
zip.write(contentXml.getBytes(StandardCharsets.UTF_8));
zip.closeEntry();
}
return destination.toFile();
}
private void setupOneValidOneFakeImport(Path tempDir) throws Exception {
byte[] pdfHeader = {0x25, 0x50, 0x44, 0x46, 0x2D}; // %PDF-
Files.write(tempDir.resolve("real.pdf"), pdfHeader);
Files.writeString(tempDir.resolve("fake.pdf"), "not a pdf");
buildMinimalImportXlsx(tempDir, "real.pdf", "fake.pdf");
ReflectionTestUtils.setField(service, "importDir", tempDir.toString());
when(documentService.findByOriginalFilename(any())).thenReturn(Optional.empty());
when(documentService.save(any())).thenAnswer(inv -> inv.getArgument(0));
}
private void buildMinimalImportXlsx(Path dir, String... filenames) throws Exception {
Path xlsx = dir.resolve("import.xlsx");
try (XSSFWorkbook wb = new XSSFWorkbook()) {
org.apache.poi.ss.usermodel.Sheet sheet = wb.createSheet("Sheet1");
sheet.createRow(0).createCell(0).setCellValue("Index");
for (int i = 0; i < filenames.length; i++) {
sheet.createRow(i + 1).createCell(0).setCellValue(filenames[i]);
}
try (OutputStream out = Files.newOutputStream(xlsx)) {
wb.write(out);
}
}
}
}

View File

@@ -0,0 +1,130 @@
package org.raddatz.familienarchiv.importing;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.extension.ExtendWith;
import org.junit.jupiter.api.io.TempDir;
import org.mockito.ArgumentCaptor;
import org.mockito.junit.jupiter.MockitoExtension;
import org.raddatz.familienarchiv.person.Person;
import org.raddatz.familienarchiv.person.PersonService;
import org.raddatz.familienarchiv.person.PersonUpsertCommand;
import java.io.OutputStream;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.UUID;
import static org.assertj.core.api.Assertions.assertThat;
import static org.mockito.ArgumentMatchers.any;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.times;
import static org.mockito.Mockito.verify;
import static org.mockito.Mockito.when;
@ExtendWith(MockitoExtension.class)
class PersonRegisterImporterTest {
@Test
void load_upsertsPersonBySourceRef_withProvisionalFalse(@TempDir Path tempDir) throws Exception {
PersonService personService = mock(PersonService.class);
when(personService.upsertBySourceRef(any())).thenAnswer(inv -> personOf(inv.getArgument(0)));
Path xlsx = writePersons(tempDir, row(
"allemeyer-elsgard", "Allemeyer", "Elsgard", "Wöhler", "Nichte von Herbert", "False"));
new PersonRegisterImporter(personService).load(xlsx.toFile());
ArgumentCaptor<PersonUpsertCommand> captor = ArgumentCaptor.forClass(PersonUpsertCommand.class);
verify(personService).upsertBySourceRef(captor.capture());
PersonUpsertCommand cmd = captor.getValue();
assertThat(cmd.sourceRef()).isEqualTo("allemeyer-elsgard");
assertThat(cmd.lastName()).isEqualTo("Allemeyer");
assertThat(cmd.firstName()).isEqualTo("Elsgard");
assertThat(cmd.maidenName()).isEqualTo("Wöhler");
assertThat(cmd.notes()).isEqualTo("Nichte von Herbert");
assertThat(cmd.provisional()).isFalse();
}
@Test
void load_parsesCapitalisedPythonBool_True(@TempDir Path tempDir) throws Exception {
PersonService personService = mock(PersonService.class);
when(personService.upsertBySourceRef(any())).thenAnswer(inv -> personOf(inv.getArgument(0)));
Path xlsx = writePersons(tempDir, row(
"noise-geschirr", "Geschirr", "", "", "", "True"));
new PersonRegisterImporter(personService).load(xlsx.toFile());
ArgumentCaptor<PersonUpsertCommand> captor = ArgumentCaptor.forClass(PersonUpsertCommand.class);
verify(personService).upsertBySourceRef(captor.capture());
assertThat(captor.getValue().provisional()).isTrue();
}
@Test
void load_skipsRowWithBlankPersonId(@TempDir Path tempDir) throws Exception {
PersonService personService = mock(PersonService.class);
Path xlsx = writePersons(tempDir, row("", "NoId", "", "", "", "False"));
new PersonRegisterImporter(personService).load(xlsx.toFile());
verify(personService, times(0)).upsertBySourceRef(any());
}
@Test
void load_returnsCountOfProcessedRows(@TempDir Path tempDir) throws Exception {
PersonService personService = mock(PersonService.class);
when(personService.upsertBySourceRef(any())).thenAnswer(inv -> personOf(inv.getArgument(0)));
Path xlsx = writePersons(tempDir,
row("a-one", "One", "A", "", "", "False"),
row("a-two", "Two", "B", "", "", "False"));
int processed = new PersonRegisterImporter(personService).load(xlsx.toFile());
assertThat(processed).isEqualTo(2);
}
private static Person personOf(PersonUpsertCommand cmd) {
return Person.builder().id(UUID.randomUUID()).sourceRef(cmd.sourceRef())
.firstName(cmd.firstName()).lastName(cmd.lastName())
.provisional(cmd.provisional()).build();
}
private Map<String, String> row(String personId, String lastName, String firstName,
String maidenName, String notes, String provisional) {
Map<String, String> r = new LinkedHashMap<>();
r.put("person_id", personId);
r.put("last_name", lastName);
r.put("first_name", firstName);
r.put("maiden_name", maidenName);
r.put("notes", notes);
r.put("provisional", provisional);
return r;
}
@SafeVarargs
private Path writePersons(Path dir, Map<String, String>... rows) throws Exception {
Path xlsx = dir.resolve("canonical-persons.xlsx");
List<String> headers = List.of("person_id", "last_name", "first_name", "maiden_name", "notes", "provisional");
try (XSSFWorkbook wb = new XSSFWorkbook()) {
Sheet sheet = wb.createSheet("Sheet1");
Row header = sheet.createRow(0);
for (int i = 0; i < headers.size(); i++) {
header.createCell(i).setCellValue(headers.get(i));
}
for (int r = 0; r < rows.length; r++) {
Row row = sheet.createRow(r + 1);
for (int c = 0; c < headers.size(); c++) {
row.createCell(c).setCellValue(rows[r].getOrDefault(headers.get(c), ""));
}
}
try (OutputStream out = Files.newOutputStream(xlsx)) {
wb.write(out);
}
}
return xlsx;
}
}

View File

@@ -0,0 +1,163 @@
package org.raddatz.familienarchiv.importing;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.extension.ExtendWith;
import org.junit.jupiter.api.io.TempDir;
import org.mockito.ArgumentCaptor;
import org.mockito.junit.jupiter.MockitoExtension;
import org.raddatz.familienarchiv.exception.DomainException;
import org.raddatz.familienarchiv.exception.ErrorCode;
import org.raddatz.familienarchiv.person.Person;
import org.raddatz.familienarchiv.person.PersonService;
import org.raddatz.familienarchiv.person.PersonUpsertCommand;
import org.raddatz.familienarchiv.person.relationship.RelationType;
import org.raddatz.familienarchiv.person.relationship.RelationshipService;
import org.raddatz.familienarchiv.person.relationship.dto.CreateRelationshipRequest;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.UUID;
import static org.assertj.core.api.Assertions.assertThat;
import static org.assertj.core.api.Assertions.assertThatThrownBy;
import static org.mockito.ArgumentMatchers.any;
import static org.mockito.ArgumentMatchers.eq;
import static org.mockito.Mockito.doThrow;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.verify;
import static org.mockito.Mockito.when;
@ExtendWith(MockitoExtension.class)
class PersonTreeImporterTest {
@Test
void load_upsertsTreePersonBySourceRef_withFamilyMemberFlag(@TempDir Path tempDir) throws Exception {
PersonService personService = mock(PersonService.class);
RelationshipService relationshipService = mock(RelationshipService.class);
when(personService.upsertBySourceRef(any())).thenAnswer(inv -> personOf(inv.getArgument(0)));
Path json = write(tempDir, """
{"persons":[
{"rowId":"row_002","firstName":"Elsgard","lastName":"Allemeyer","maidenName":"Wöhler",
"notes":"Nichte","birthYear":1920,"deathYear":1999,"familyMember":true,"personId":"allemeyer-elsgard"}
],"relationships":[]}
""");
new PersonTreeImporter(personService, relationshipService)
.load(json.toFile());
ArgumentCaptor<PersonUpsertCommand> captor = ArgumentCaptor.forClass(PersonUpsertCommand.class);
verify(personService).upsertBySourceRef(captor.capture());
PersonUpsertCommand cmd = captor.getValue();
assertThat(cmd.sourceRef()).isEqualTo("allemeyer-elsgard");
assertThat(cmd.familyMember()).isTrue();
assertThat(cmd.provisional()).isFalse();
}
@Test
void load_createsRelationship_resolvingRowIdsToUpsertedPersons(@TempDir Path tempDir) throws Exception {
PersonService personService = mock(PersonService.class);
RelationshipService relationshipService = mock(RelationshipService.class);
UUID idA = UUID.randomUUID();
UUID idB = UUID.randomUUID();
when(personService.upsertBySourceRef(any())).thenAnswer(inv -> {
PersonUpsertCommand c = inv.getArgument(0);
return Person.builder().id(c.sourceRef().equals("a") ? idA : idB)
.sourceRef(c.sourceRef()).lastName(c.lastName()).build();
});
Path json = write(tempDir, """
{"persons":[
{"rowId":"row_a","lastName":"A","familyMember":true,"personId":"a"},
{"rowId":"row_b","lastName":"B","familyMember":true,"personId":"b"}
],"relationships":[
{"personId":"row_a","relatedPersonId":"row_b","type":"SPOUSE_OF","source":"verheiratet_mit"}
]}
""");
new PersonTreeImporter(personService, relationshipService)
.load(json.toFile());
ArgumentCaptor<CreateRelationshipRequest> captor = ArgumentCaptor.forClass(CreateRelationshipRequest.class);
verify(relationshipService).addRelationship(eq(idA), captor.capture());
assertThat(captor.getValue().relatedPersonId()).isEqualTo(idB);
assertThat(captor.getValue().relationType()).isEqualTo(RelationType.SPOUSE_OF);
}
@Test
void load_swallowsDuplicateRelationship_forIdempotentReimport(@TempDir Path tempDir) throws Exception {
PersonService personService = mock(PersonService.class);
RelationshipService relationshipService = mock(RelationshipService.class);
when(personService.upsertBySourceRef(any()))
.thenAnswer(inv -> personOf(inv.getArgument(0)));
doThrow(DomainException.conflict(ErrorCode.DUPLICATE_RELATIONSHIP, "exists"))
.when(relationshipService).addRelationship(any(), any());
Path json = write(tempDir, """
{"persons":[
{"rowId":"row_a","lastName":"A","familyMember":true,"personId":"a"},
{"rowId":"row_b","lastName":"B","familyMember":true,"personId":"b"}
],"relationships":[
{"personId":"row_a","relatedPersonId":"row_b","type":"SPOUSE_OF","source":"verheiratet_mit"}
]}
""");
PersonTreeImporter importer = new PersonTreeImporter(personService, relationshipService);
// Must not propagate the conflict — re-import is idempotent.
importer.load(json.toFile());
verify(relationshipService).addRelationship(any(), any());
}
@Test
void load_propagatesUnexpectedDomainException_fromAddRelationship(@TempDir Path tempDir) throws Exception {
PersonService personService = mock(PersonService.class);
RelationshipService relationshipService = mock(RelationshipService.class);
when(personService.upsertBySourceRef(any()))
.thenAnswer(inv -> personOf(inv.getArgument(0)));
// An unexpected ErrorCode (not DUPLICATE/CIRCULAR) must NOT be swallowed.
doThrow(DomainException.internal(ErrorCode.INTERNAL_ERROR, "boom"))
.when(relationshipService).addRelationship(any(), any());
Path json = write(tempDir, """
{"persons":[
{"rowId":"row_a","lastName":"A","familyMember":true,"personId":"a"},
{"rowId":"row_b","lastName":"B","familyMember":true,"personId":"b"}
],"relationships":[
{"personId":"row_a","relatedPersonId":"row_b","type":"SPOUSE_OF","source":"verheiratet_mit"}
]}
""");
PersonTreeImporter importer = new PersonTreeImporter(personService, relationshipService);
assertThatThrownBy(() -> importer.load(json.toFile()))
.isInstanceOf(DomainException.class)
.extracting("code").isEqualTo(ErrorCode.INTERNAL_ERROR);
}
@Test
void load_skipsRelationship_whenRowIdUnresolved(@TempDir Path tempDir) throws Exception {
PersonService personService = mock(PersonService.class);
RelationshipService relationshipService = mock(RelationshipService.class);
when(personService.upsertBySourceRef(any())).thenAnswer(inv -> personOf(inv.getArgument(0)));
Path json = write(tempDir, """
{"persons":[
{"rowId":"row_a","lastName":"A","familyMember":true,"personId":"a"}
],"relationships":[
{"personId":"row_a","relatedPersonId":"row_ghost","type":"SPOUSE_OF","source":"x"}
]}
""");
new PersonTreeImporter(personService, relationshipService)
.load(json.toFile());
verify(relationshipService, org.mockito.Mockito.never()).addRelationship(any(), any());
}
private static Person personOf(PersonUpsertCommand cmd) {
return Person.builder().id(UUID.randomUUID()).sourceRef(cmd.sourceRef()).lastName(cmd.lastName()).build();
}
private Path write(Path dir, String json) throws Exception {
Path file = dir.resolve("canonical-persons-tree.json");
Files.writeString(file, json);
return file;
}
}

View File

@@ -0,0 +1,103 @@
package org.raddatz.familienarchiv.importing;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.extension.ExtendWith;
import org.junit.jupiter.api.io.TempDir;
import org.mockito.junit.jupiter.MockitoExtension;
import org.raddatz.familienarchiv.tag.Tag;
import org.raddatz.familienarchiv.tag.TagService;
import java.io.OutputStream;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.List;
import java.util.UUID;
import static org.assertj.core.api.Assertions.assertThat;
import static org.mockito.ArgumentMatchers.any;
import static org.mockito.ArgumentMatchers.eq;
import static org.mockito.ArgumentMatchers.isNull;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.verify;
import static org.mockito.Mockito.when;
@ExtendWith(MockitoExtension.class)
class TagTreeImporterTest {
@Test
void load_upsertsRootTagWithNullParent(@TempDir Path tempDir) throws Exception {
TagService tagService = mock(TagService.class);
when(tagService.upsertBySourceRef(any(), any(), any()))
.thenAnswer(inv -> tagOf(inv.getArgument(0), inv.getArgument(1), inv.getArgument(2)));
Path xlsx = writeTagTree(tempDir, List.<String[]>of(
new String[]{"Themen", "", "Themen"}));
new TagTreeImporter(tagService).load(xlsx.toFile());
verify(tagService).upsertBySourceRef("Themen", "Themen", null);
}
@Test
void load_resolvesParentByPath_forChildTag(@TempDir Path tempDir) throws Exception {
TagService tagService = mock(TagService.class);
UUID rootId = UUID.randomUUID();
when(tagService.upsertBySourceRef(eq("Themen"), eq("Themen"), isNull()))
.thenReturn(tagOf("Themen", "Themen", null, rootId));
when(tagService.upsertBySourceRef(eq("Themen/Brautbriefe"), eq("Brautbriefe"), eq(rootId)))
.thenReturn(tagOf("Themen/Brautbriefe", "Brautbriefe", rootId));
Path xlsx = writeTagTree(tempDir, List.<String[]>of(
new String[]{"Themen", "", "Themen"},
new String[]{"Themen/Brautbriefe", "Themen", "Brautbriefe"}));
new TagTreeImporter(tagService).load(xlsx.toFile());
verify(tagService).upsertBySourceRef("Themen/Brautbriefe", "Brautbriefe", rootId);
}
@Test
void load_returnsCountOfProcessedRows(@TempDir Path tempDir) throws Exception {
TagService tagService = mock(TagService.class);
when(tagService.upsertBySourceRef(any(), any(), any()))
.thenAnswer(inv -> tagOf(inv.getArgument(0), inv.getArgument(1), inv.getArgument(2)));
Path xlsx = writeTagTree(tempDir, List.<String[]>of(
new String[]{"Themen", "", "Themen"},
new String[]{"Themen/Brautbriefe", "Themen", "Brautbriefe"}));
int processed = new TagTreeImporter(tagService).load(xlsx.toFile());
assertThat(processed).isEqualTo(2);
}
private static Tag tagOf(String sourceRef, String name, UUID parentId) {
return tagOf(sourceRef, name, parentId, UUID.randomUUID());
}
private static Tag tagOf(String sourceRef, String name, UUID parentId, UUID id) {
return Tag.builder().id(id).sourceRef(sourceRef).name(name).parentId(parentId).build();
}
private Path writeTagTree(Path dir, List<String[]> rows) throws Exception {
Path xlsx = dir.resolve("canonical-tag-tree.xlsx");
try (XSSFWorkbook wb = new XSSFWorkbook()) {
Sheet sheet = wb.createSheet("Sheet1");
Row header = sheet.createRow(0);
header.createCell(0).setCellValue("tag_path");
header.createCell(1).setCellValue("parent_name");
header.createCell(2).setCellValue("tag_name");
for (int r = 0; r < rows.size(); r++) {
Row row = sheet.createRow(r + 1);
String[] values = rows.get(r);
for (int c = 0; c < values.length; c++) {
row.createCell(c).setCellValue(values[c]);
}
}
try (OutputStream out = Files.newOutputStream(xlsx)) {
wb.write(out);
}
}
return xlsx;
}
}

View File

@@ -0,0 +1,151 @@
package org.raddatz.familienarchiv.person;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.extension.ExtendWith;
import org.mockito.InjectMocks;
import org.mockito.Mock;
import org.mockito.junit.jupiter.MockitoExtension;
import java.util.Optional;
import java.util.UUID;
import static org.assertj.core.api.Assertions.assertThat;
import static org.mockito.ArgumentMatchers.any;
import static org.mockito.ArgumentMatchers.argThat;
import static org.mockito.Mockito.never;
import static org.mockito.Mockito.verify;
import static org.mockito.Mockito.when;
@ExtendWith(MockitoExtension.class)
class PersonImportUpsertTest {
@Mock PersonRepository personRepository;
@Mock PersonNameAliasRepository aliasRepository;
@InjectMocks PersonService personService;
@Test
void upsertBySourceRef_insertsNewPerson_whenSourceRefUnknown() {
when(personRepository.findBySourceRef("clara-cram")).thenReturn(Optional.empty());
when(personRepository.save(any())).thenAnswer(inv -> inv.getArgument(0));
PersonUpsertCommand cmd = PersonUpsertCommand.builder()
.sourceRef("clara-cram").firstName("Clara").lastName("Cram")
.personType(PersonType.PERSON).provisional(false).build();
Person result = personService.upsertBySourceRef(cmd);
assertThat(result.getSourceRef()).isEqualTo("clara-cram");
assertThat(result.getFirstName()).isEqualTo("Clara");
assertThat(result.getLastName()).isEqualTo("Cram");
assertThat(result.isProvisional()).isFalse();
}
@Test
void upsertBySourceRef_updatesInPlace_whenSourceRefExists() {
Person existing = Person.builder()
.id(UUID.randomUUID()).sourceRef("clara-cram")
.firstName("Clara").lastName("Cram").build();
when(personRepository.findBySourceRef("clara-cram")).thenReturn(Optional.of(existing));
when(personRepository.save(any())).thenAnswer(inv -> inv.getArgument(0));
PersonUpsertCommand cmd = PersonUpsertCommand.builder()
.sourceRef("clara-cram").firstName("Clara").lastName("Cram")
.notes("Updated note").personType(PersonType.PERSON).provisional(false).build();
personService.upsertBySourceRef(cmd);
verify(personRepository).save(argThat(p -> p.getId().equals(existing.getId())));
verify(personRepository, never()).save(argThat(p -> p.getId() == null));
}
@Test
void upsertBySourceRef_preservesHumanEditedNonBlankFields() {
// A human renamed the maiden-name register person and added notes in-app.
Person humanEdited = Person.builder()
.id(UUID.randomUUID()).sourceRef("clara-cram")
.firstName("Klara").lastName("Cram-Müller").notes("Verified by Marcel").build();
when(personRepository.findBySourceRef("clara-cram")).thenReturn(Optional.of(humanEdited));
when(personRepository.save(any())).thenAnswer(inv -> inv.getArgument(0));
PersonUpsertCommand cmd = PersonUpsertCommand.builder()
.sourceRef("clara-cram").firstName("Clara").lastName("Cram")
.notes("Auto note").personType(PersonType.PERSON).provisional(false).build();
Person result = personService.upsertBySourceRef(cmd);
// Human edits survive the re-import.
assertThat(result.getFirstName()).isEqualTo("Klara");
assertThat(result.getLastName()).isEqualTo("Cram-Müller");
assertThat(result.getNotes()).isEqualTo("Verified by Marcel");
}
@Test
void upsertBySourceRef_fillsOnlyBlankFields_onReimport() {
Person existing = Person.builder()
.id(UUID.randomUUID()).sourceRef("clara-cram")
.firstName("Clara").lastName("Cram").notes(null).build();
when(personRepository.findBySourceRef("clara-cram")).thenReturn(Optional.of(existing));
when(personRepository.save(any())).thenAnswer(inv -> inv.getArgument(0));
PersonUpsertCommand cmd = PersonUpsertCommand.builder()
.sourceRef("clara-cram").firstName("Clara").lastName("Cram")
.notes("Nichte von Herbert").personType(PersonType.PERSON).provisional(false).build();
Person result = personService.upsertBySourceRef(cmd);
// Blank field gets filled by canonical value.
assertThat(result.getNotes()).isEqualTo("Nichte von Herbert");
}
@Test
void upsertBySourceRef_fillsBlankYears_butPreservesHumanEditedYears_onReimport() {
// Existing has a human-set birthYear and a blank deathYear.
Person existing = Person.builder()
.id(UUID.randomUUID()).sourceRef("clara-cram")
.lastName("Cram").birthYear(1890).deathYear(null).build();
when(personRepository.findBySourceRef("clara-cram")).thenReturn(Optional.of(existing));
when(personRepository.save(any())).thenAnswer(inv -> inv.getArgument(0));
PersonUpsertCommand cmd = PersonUpsertCommand.builder()
.sourceRef("clara-cram").lastName("Cram")
.birthYear(1888).deathYear(1965)
.personType(PersonType.PERSON).provisional(false).build();
Person result = personService.upsertBySourceRef(cmd);
assertThat(result.getBirthYear()).isEqualTo(1890); // human value kept
assertThat(result.getDeathYear()).isEqualTo(1965); // blank filled from canonical
}
@Test
void upsertBySourceRef_neverFlipsProvisionalBackToTrue_onceHumanConfirmed() {
// A human confirmed this provisional importer-created person (provisional -> false).
Person confirmed = Person.builder()
.id(UUID.randomUUID()).sourceRef("schwester-hanni")
.firstName(null).lastName("Schwester Hanni").provisional(false).build();
when(personRepository.findBySourceRef("schwester-hanni")).thenReturn(Optional.of(confirmed));
when(personRepository.save(any())).thenAnswer(inv -> inv.getArgument(0));
PersonUpsertCommand cmd = PersonUpsertCommand.builder()
.sourceRef("schwester-hanni").lastName("Schwester Hanni")
.personType(PersonType.PERSON).provisional(true).build();
Person result = personService.upsertBySourceRef(cmd);
assertThat(result.isProvisional()).isFalse();
}
@Test
void upsertBySourceRef_setsProvisionalTrue_forNewProvisionalPerson() {
when(personRepository.findBySourceRef("noise-geschirr")).thenReturn(Optional.empty());
when(personRepository.save(any())).thenAnswer(inv -> inv.getArgument(0));
PersonUpsertCommand cmd = PersonUpsertCommand.builder()
.sourceRef("noise-geschirr").lastName("Tante Tüten")
.personType(PersonType.PERSON).provisional(true).build();
Person result = personService.upsertBySourceRef(cmd);
assertThat(result.isProvisional()).isTrue();
}
}

View File

@@ -0,0 +1,62 @@
package org.raddatz.familienarchiv.tag;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.extension.ExtendWith;
import org.mockito.InjectMocks;
import org.mockito.Mock;
import org.mockito.junit.jupiter.MockitoExtension;
import java.util.Optional;
import java.util.UUID;
import static org.assertj.core.api.Assertions.assertThat;
import static org.mockito.ArgumentMatchers.any;
import static org.mockito.ArgumentMatchers.argThat;
import static org.mockito.Mockito.never;
import static org.mockito.Mockito.verify;
import static org.mockito.Mockito.when;
@ExtendWith(MockitoExtension.class)
class TagImportUpsertTest {
@Mock TagRepository tagRepository;
@InjectMocks TagService tagService;
@Test
void upsertBySourceRef_insertsNewTag_whenSourceRefUnknown() {
when(tagRepository.findBySourceRef("Themen/Brautbriefe")).thenReturn(Optional.empty());
when(tagRepository.save(any())).thenAnswer(inv -> inv.getArgument(0));
UUID parentId = UUID.randomUUID();
Tag result = tagService.upsertBySourceRef("Themen/Brautbriefe", "Brautbriefe", parentId);
assertThat(result.getSourceRef()).isEqualTo("Themen/Brautbriefe");
assertThat(result.getName()).isEqualTo("Brautbriefe");
assertThat(result.getParentId()).isEqualTo(parentId);
}
@Test
void upsertBySourceRef_updatesInPlace_whenSourceRefExists() {
Tag existing = Tag.builder().id(UUID.randomUUID()).name("Brautbriefe")
.sourceRef("Themen/Brautbriefe").build();
when(tagRepository.findBySourceRef("Themen/Brautbriefe")).thenReturn(Optional.of(existing));
when(tagRepository.save(any())).thenAnswer(inv -> inv.getArgument(0));
tagService.upsertBySourceRef("Themen/Brautbriefe", "Brautbriefe", null);
verify(tagRepository).save(argThat(t -> t.getId().equals(existing.getId())));
verify(tagRepository, never()).save(argThat(t -> t.getId() == null));
}
@Test
void upsertBySourceRef_preservesHumanRenamedTag_onReimport() {
Tag humanRenamed = Tag.builder().id(UUID.randomUUID()).name("Verlobungsbriefe")
.sourceRef("Themen/Brautbriefe").build();
when(tagRepository.findBySourceRef("Themen/Brautbriefe")).thenReturn(Optional.of(humanRenamed));
when(tagRepository.save(any())).thenAnswer(inv -> inv.getArgument(0));
Tag result = tagService.upsertBySourceRef("Themen/Brautbriefe", "Brautbriefe", null);
assertThat(result.getName()).isEqualTo("Verlobungsbriefe");
}
}

View File

@@ -7,7 +7,8 @@ import org.raddatz.familienarchiv.security.PermissionAspect;
import org.raddatz.familienarchiv.user.CustomUserDetailsService; import org.raddatz.familienarchiv.user.CustomUserDetailsService;
import org.raddatz.familienarchiv.document.DocumentService; import org.raddatz.familienarchiv.document.DocumentService;
import org.raddatz.familienarchiv.document.DocumentVersionService; import org.raddatz.familienarchiv.document.DocumentVersionService;
import org.raddatz.familienarchiv.importing.MassImportService; import org.raddatz.familienarchiv.importing.CanonicalImportOrchestrator;
import org.raddatz.familienarchiv.importing.ImportStatus;
import org.raddatz.familienarchiv.document.ThumbnailBackfillService; import org.raddatz.familienarchiv.document.ThumbnailBackfillService;
import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.autoconfigure.aop.AopAutoConfiguration; import org.springframework.boot.autoconfigure.aop.AopAutoConfiguration;
@@ -35,7 +36,7 @@ class AdminControllerTest {
@Autowired MockMvc mockMvc; @Autowired MockMvc mockMvc;
@MockitoBean MassImportService massImportService; @MockitoBean CanonicalImportOrchestrator importOrchestrator;
@MockitoBean DocumentService documentService; @MockitoBean DocumentService documentService;
@MockitoBean DocumentVersionService documentVersionService; @MockitoBean DocumentVersionService documentVersionService;
@MockitoBean ThumbnailBackfillService thumbnailBackfillService; @MockitoBean ThumbnailBackfillService thumbnailBackfillService;
@@ -46,9 +47,9 @@ class AdminControllerTest {
@Test @Test
@WithMockUser(authorities = "ADMIN") @WithMockUser(authorities = "ADMIN")
void importStatus_returns200_withStatusCode_whenAdmin() throws Exception { void importStatus_returns200_withStatusCode_whenAdmin() throws Exception {
MassImportService.ImportStatus status = new MassImportService.ImportStatus( ImportStatus status = new ImportStatus(
MassImportService.State.IDLE, "IMPORT_IDLE", "Kein Import gestartet.", 0, List.of(), null); ImportStatus.State.IDLE, "IMPORT_IDLE", "Kein Import gestartet.", 0, List.of(), null);
when(massImportService.getStatus()).thenReturn(status); when(importOrchestrator.getStatus()).thenReturn(status);
mockMvc.perform(get("/api/admin/import-status")) mockMvc.perform(get("/api/admin/import-status"))
.andExpect(status().isOk()) .andExpect(status().isOk())
@@ -60,9 +61,9 @@ class AdminControllerTest {
@Test @Test
@WithMockUser(authorities = "ADMIN") @WithMockUser(authorities = "ADMIN")
void importStatus_messageField_notPresentInApiResponse() throws Exception { void importStatus_messageField_notPresentInApiResponse() throws Exception {
MassImportService.ImportStatus status = new MassImportService.ImportStatus( ImportStatus status = new ImportStatus(
MassImportService.State.IDLE, "IMPORT_IDLE", "Kein Import gestartet.", 0, List.of(), null); ImportStatus.State.IDLE, "IMPORT_IDLE", "Kein Import gestartet.", 0, List.of(), null);
when(massImportService.getStatus()).thenReturn(status); when(importOrchestrator.getStatus()).thenReturn(status);
mockMvc.perform(get("/api/admin/import-status")) mockMvc.perform(get("/api/admin/import-status"))
.andExpect(status().isOk()) .andExpect(status().isOk())

View File

@@ -559,20 +559,40 @@ bash scripts/download-kraken-models.sh
> Downloads the Kurrent/Sütterlin HTR models. Run once after a fresh clone or when models are updated. > Downloads the Kurrent/Sütterlin HTR models. Run once after a fresh clone or when models are updated.
### Trigger a mass import (Excel/ODS) ### Trigger a canonical import
**Dev:** drop the ODS spreadsheet + PDFs into `./import/` at the repo root — the dev compose bind-mounts it to `/import` automatically. The importer no longer parses the raw spreadsheet. It consumes the **canonical artifacts**
produced by the normalizer (`tools/import-normalizer/`) — `canonical-tag-tree.xlsx`,
`canonical-persons.xlsx`, `canonical-persons-tree.json`, `canonical-documents.xlsx` — which
are committed under `tools/import-normalizer/out/`. The semantic transformation
(German-date parsing, name classification) lives entirely in the normalizer; the backend
maps the clean columns by header name. See [ADR-025](adr/025-canonical-import-and-single-migration-schema-foundation.md).
**Prerequisite — regenerate the artifacts when the source data changes:**
```bash
cd tools/import-normalizer
python3 -m venv .venv && .venv/bin/pip install -r requirements.txt # once, on a fresh clone
.venv/bin/python normalize.py
# writes the four canonical artifacts into ./out/
```
**Dev:** place all four canonical artifacts **plus** the referenced PDFs into `./import/`
at the repo root (the dev compose bind-mounts it to `/import`, which is `app.import.dir`).
The orchestrator smoke-checks that all four artifacts are present before starting and fails
closed (`IMPORT_ARTIFACT_INVALID`) if any is missing.
**Staging/production:** **Staging/production:**
1. Pre-stage the payload on the host. Convention: `/srv/familienarchiv-staging/import/` or `/srv/familienarchiv-production/import/`. 1. Pre-stage the four canonical artifacts + PDFs on the host. Convention:
`/srv/familienarchiv-staging/import/` or `/srv/familienarchiv-production/import/`.
```bash ```bash
rsync -avh --progress ./import/ user@host:/srv/familienarchiv-staging/import/ rsync -avh --progress ./import/ user@host:/srv/familienarchiv-staging/import/
``` ```
2. Make sure `IMPORT_HOST_DIR=<host-path>` is set in `.env.staging` / `.env.production` (the nightly/release workflows already write this — see §3). Compose refuses to start without it. 2. Make sure `IMPORT_HOST_DIR=<host-path>` is set in `.env.staging` / `.env.production` (the nightly/release workflows already write this — see §3). Compose refuses to start without it.
3. Redeploy the stack so the bind mount picks up — or, if the mount is already in place, skip to step 4. 3. Redeploy the stack so the bind mount picks up — or, if the mount is already in place, skip to step 4.
4. Call `POST /api/admin/trigger-import` (requires `ADMIN` permission), or click the "Import starten" button on `/admin/system`. 4. Call `POST /api/admin/trigger-import` (requires `ADMIN` permission), or click the "Import starten" button on `/admin/system`.
5. The import runs asynchronously — poll `GET /api/admin/import-status`, watch `/admin/system`, or tail the backend logs. 5. The import runs asynchronously — poll `GET /api/admin/import-status`, watch `/admin/system`, or tail the backend logs. Re-running is safe and idempotent (upsert by `source_ref` / document `index`). Person and tag scalar fields you edited in the app are preserved on re-import; a document's sender/receivers/tags are **canonical-authoritative** — a re-import re-applies them to exactly match the export, so a link removed from the export is removed from the document (the raw sender/receiver cell text is always kept).
--- ---

View File

@@ -64,9 +64,13 @@ _See also [Annotation](#annotation-documentannotation)._
- `REVIEWED`: a reviewer has approved the transcription. - `REVIEWED`: a reviewer has approved the transcription.
- `ARCHIVED`: the document is finalized and read-only. - `ARCHIVED`: the document is finalized and read-only.
**Mass import** — an asynchronous batch process (`MassImportService`) that reads an Excel or ODS file and creates `Person`s, `Tag`s, and `PLACEHOLDER` `Document`s in one shot. Only one import can run at a time (`IMPORT_ALREADY_RUNNING` error if attempted concurrently). **Canonical import** — an asynchronous batch process (`CanonicalImportOrchestrator`) that consumes the normalizer's committed canonical artifacts and creates `Tag`s, `Person`s (register + tree), family relationships, and `Document`s. Four idempotent loaders run in a fixed dependency order — `TagTreeImporter``PersonRegisterImporter``PersonTreeImporter``DocumentImporter` — each calling the owning domain's service. Re-running it never duplicates rows (upsert by `source_ref` / document `index`) and never overwrites a human-edited field. Only one import can run at a time (`IMPORT_ALREADY_RUNNING` error if attempted concurrently); a missing or malformed artifact fails closed (`IMPORT_ARTIFACT_INVALID`). Replaced the legacy raw-spreadsheet `MassImportService` (see ADR-025).
**SkippedFile** (`MassImportService.SkippedFile`) — a file that was presented for import but not processed, recorded with a `filename` and a `reason` code. Possible reasons: `INVALID_PDF_SIGNATURE` (magic-byte validation failed), `S3_UPLOAD_FAILED` (file upload to MinIO/S3 threw an exception), `FILE_READ_ERROR` (the file could not be opened for reading), or `ALREADY_EXISTS` (a document with the same filename already exists in the archive with a status other than `PLACEHOLDER`). **canonical artifact** — one of the four files the normalizer (`tools/import-normalizer/`) emits and commits to `tools/import-normalizer/out/`: `canonical-tag-tree.xlsx`, `canonical-persons.xlsx`, `canonical-persons-tree.json`, `canonical-documents.xlsx`. They are the contract the backend importer reads (mapped by header name); the semantic transformation (German-date parsing, name classification) lives only in the normalizer, never in Java.
**CanonicalSheetReader** — the value-level POI helper that opens a canonical `.xlsx`, maps the header row to column indices by name (replacing the brittle positional column config), splits pipe-delimited list columns, and throws `IMPORT_ARTIFACT_INVALID` on a missing required header rather than NPE-ing on a null index.
**SkippedFile** (`ImportStatus.SkippedFile`) — a file that was presented for import but not processed, recorded with a `filename` and a `reason` code. Possible reasons: `INVALID_FILENAME_PATH_TRAVERSAL` (the file-column basename failed the path-traversal guard), `INVALID_PDF_SIGNATURE` (magic-byte validation failed), `S3_UPLOAD_FAILED` (file upload to MinIO/S3 threw an exception), `FILE_READ_ERROR` (the file could not be opened for reading), or `ALREADY_EXISTS` (a document with the same `index` already exists in the archive with a status other than `PLACEHOLDER`).
**skipped count** — the total number of `SkippedFile` entries accumulated during a single import run (`ImportStatus.skipped()`). Shown in the amber warning section of the Import Status Card in the admin UI; a value of zero suppresses the section entirely. **skipped count** — the total number of `SkippedFile` entries accumulated during a single import run (`ImportStatus.skipped()`). Shown in the amber warning section of the Import Status Card in the admin UI; a value of zero suppresses the section entirely.

View File

@@ -2,7 +2,7 @@
**Date:** 2026-05-27 **Date:** 2026-05-27
**Status:** Accepted **Status:** Accepted
**Issue:** #671 **Issue:** #671 (schema, decisions 12); #669 (importer architecture, decision 3)
**Milestone:** Handling the Unknowns — honest uncertainty in dates & people **Milestone:** Handling the Unknowns — honest uncertainty in dates & people
--- ---
@@ -56,6 +56,59 @@ The importer reads the Python normalizer's canonical output
output strings are persisted as-is. The same applies to `source_ref`, which carries the output strings are persisted as-is. The same applies to `source_ref`, which carries the
normalizer's `person_id` / canonical `tag_path` unchanged as the re-import idempotency key. normalizer's `person_id` / canonical `tag_path` unchanged as the re-import idempotency key.
### 3. The importer is four idempotent loaders over the canonical artifacts; Java no longer parses the raw spreadsheet (Phase 3, #669)
The legacy `MassImportService` read the *raw* original spreadsheet by positional column
index (`@Value app.import.col.*`) and re-derived everything in Java (ISO-only date parsing,
name classification via `findOrCreateByAlias`, an ODS/XXE XML path). It is **deleted**.
The rebuild is a `CanonicalImportOrchestrator` driving four single-responsibility loaders in
an explicit dependency DAG — `TagTreeImporter``PersonRegisterImporter`
`PersonTreeImporter``DocumentImporter` — that **consume the committed canonical artifacts**
(`tools/import-normalizer/out/`). A shared `CanonicalSheetReader` maps columns **by header
name** (not by index) and fails closed (`IMPORT_ARTIFACT_INVALID`) on a missing header. Each
loader calls the **owning domain's service**, never a repository (layering rule); the tree
loader uses `RelationshipService`, never the relationship repository.
Settled sub-decisions:
- **Idempotency precedence is domain-specific.** Persons/tags upsert by `source_ref`,
documents by `index`. Two distinct rules apply:
- **Person/Tag scalar fields = preserve human edits.** On re-import a non-blank field a human
changed in-app is never overwritten (blank fields are filled from canonical via the single
`preferHuman` idiom), and `provisional` is monotonic-downward — once a human confirms a
person (`false`) it never reverts to `true`. Because the orchestrator loads the register and
tree *before* documents, a person already `false` can never be flipped provisional by a
later document row that references the same `source_ref`, regardless of document-row order.
- **Document sender/receivers/tags = canonical-authoritative.** A document's sender, receiver
set, and tag set are owned by the canonical row, not the archivist. On re-import of a
PLACEHOLDER document `DocumentImporter` clears and re-populates `receivers`/`tags` so a row
whose set *shrinks* prunes the removed links rather than accumulating stale ones. The
"preserve human edits" rule above does **not** extend to these collections. The raw
`sender_text`/`receiver_text` cells are always retained verbatim (a separate invariant).
Note non-PLACEHOLDER documents are skipped entirely (`ALREADY_EXISTS`), so once a document
has a file the importer never touches it again — this bounds the authoritative-overwrite
blast radius to placeholder rows.
Verified against real Postgres in `CanonicalImportIntegrationTest`
(`reimport_preservesHumanEditedPersonField`, `reimport_prunesRemovedReceiverAndTag…`,
`import_neverFlipsRegisterPersonToProvisional…`).
- **Name policy = Option A.** The normalizer resolved attribution upstream: the document sheet
carries the resolved slug in `sender_person_id` / `receiver_person_ids` and the raw cell in
`sender_name` / `receiver_names`. The importer routes register-first by `source_ref`
(provisional `Person` when a slug is unmatched), and **always retains the raw cell** in
`sender_text` / `receiver_text` even when a person is linked — the load-bearing invariant
behind the merge story. A row with no slug but raw text (prose / `?` / object-noise) links
no person and keeps only the raw text.
- **`provisional` is now populated.** Importer-minted persons are `provisional = true`;
register and tree persons stay `false`. This is the Phase-3 contract the schema (decision 1)
left at default-`false`.
- **Security guards are defense-in-depth, not upstream-trust.** The `file` column is treated as
hostile (CWE-22 does not care it came from our tool): its basename is validated
(`isValidImportFilename` — slash/backslash, three Unicode slash homoglyphs, `..`, null byte,
absolute path) and resolved only inside the import dir with canonical-path containment, so a
traversal value can never escape. The `%PDF` magic-byte check gates upload. These guards and
their tests were ported from `MassImportService` **before** it was deleted.
--- ---
## Consequences ## Consequences
@@ -76,6 +129,20 @@ normalizer's `person_id` / canonical `tag_path` unchanged as the re-import idemp
- **Forward-only.** The migration is immutable once shipped (Flyway checksum model); any fix - **Forward-only.** The migration is immutable once shipped (Flyway checksum model); any fix
goes in a later version. There is no down-migration — rollback means restoring from the goes in a later version. There is no down-migration — rollback means restoring from the
nightly `pg_dump`, the standard procedure. nightly `pg_dump`, the standard procedure.
- **`runImport()` is non-transactional — per-loader transactions only.** The orchestrator
does not wrap the four loaders in a single transaction; each loader (or the per-call
`upsertBySourceRef` / `DocumentImporter.load`) carries its own `@Transactional` boundary. A
partial failure mid-run (e.g. the document loader throws after tags + persons committed)
leaves the earlier loaders' data committed and the `ImportStatus` set to `FAILED`. This is
acceptable precisely because the import is idempotent: re-running is safe and converges to
the same state, so the operational recovery for a partial failure is simply to fix the
offending artifact and re-trigger the import — no manual cleanup of half-written data is
required. A future maintainer must not assume all-or-nothing semantics.
- **Path-escape aborts the whole import (fail-closed), by design.** A path-traversal or
symlink-escape in a row's file path is treated as an attack signal: the import aborts rather
than recording the row as a `SkippedFile` and continuing. This is a deliberate owner decision
(2026-05-27) over a per-file skip — a malicious path must surface loudly, not be silently
tolerated.
- **`PersonSummaryDTO` coupling.** `provisional` was added to the `PersonSummaryDTO` native - **`PersonSummaryDTO` coupling.** `provisional` was added to the `PersonSummaryDTO` native
interface projection; because the projection is backed by native SQL, the column had to be interface projection; because the projection is backed by native SQL, the column had to be
added to all three native `SELECT`s (`findAllWithDocumentCount`, `searchWithDocumentCount`, added to all three native `SELECT`s (`findAllWithDocumentCount`, `searchWithDocumentCount`,

View File

@@ -1,7 +1,7 @@
@startuml @startuml
!include <C4/C4_Component> !include <C4/C4_Component>
title Component Diagram: API Backend — Document Management & Import title Component Diagram: API Backend — Document Management & Canonical Import
Container(frontend, "Web Frontend", "SvelteKit") Container(frontend, "Web Frontend", "SvelteKit")
ContainerDb(db, "PostgreSQL", "PostgreSQL 16") ContainerDb(db, "PostgreSQL", "PostgreSQL 16")
@@ -9,30 +9,48 @@ ContainerDb(minio, "Object Storage", "MinIO (S3-compatible)")
System_Boundary(backend, "API Backend (Spring Boot)") { System_Boundary(backend, "API Backend (Spring Boot)") {
Component(docCtrl, "DocumentController", "Spring MVC — /api/documents", "CRUD for documents: search, get by ID, update metadata, upload/download file, conversation thread, batch metadata updates, and per-month density aggregation for the timeline filter widget.") Component(docCtrl, "DocumentController", "Spring MVC — /api/documents", "CRUD for documents: search, get by ID, update metadata, upload/download file, conversation thread, batch metadata updates, and per-month density aggregation for the timeline filter widget.")
Component(adminCtrl, "AdminController", "Spring MVC — /api/admin", "Triggers asynchronous Excel/ODS mass import (requires ADMIN permission). Reports import state (IDLE/RUNNING/DONE/FAILED).") Component(adminCtrl, "AdminController", "Spring MVC — /api/admin", "Triggers the asynchronous canonical import (requires ADMIN permission). Reports import state (IDLE/RUNNING/DONE/FAILED).")
Component(docSvc, "DocumentService", "Spring Service", "Core document business logic: store, update, search. Resolves persons and tags, delegates file I/O to FileService, builds dynamic JPA Specifications, and integrates with audit logging.") Component(docSvc, "DocumentService", "Spring Service", "Core document business logic: store, update, search. Resolves persons and tags, delegates file I/O to FileService, builds dynamic JPA Specifications, and integrates with audit logging.")
Component(fileSvc, "FileService", "Spring Service", "Wraps AWS SDK v2 S3Client. Uploads files with UUID-keyed paths, computes SHA-256 hash, downloads with content-type detection, and generates presigned URLs for OCR access.") Component(fileSvc, "FileService", "Spring Service", "Wraps AWS SDK v2 S3Client. Uploads files with UUID-keyed paths, computes SHA-256 hash, downloads with content-type detection, and generates presigned URLs for OCR access.")
Component(massImport, "MassImportService", "Spring Service — @Async", "Reads Excel/ODS files from /import mount. Tracks import state (IDLE/RUNNING/DONE/FAILED) and delegates to ExcelService. Returns immediately; processing runs asynchronously.") Component(importOrch, "CanonicalImportOrchestrator", "Spring Service — @Async", "Runs the four canonical loaders in an explicit dependency DAG (TagTree → PersonRegister → PersonTree → Document). Smoke-checks all four artifacts before starting, owns the IDLE/RUNNING/DONE/FAILED state machine, fails closed on a malformed artifact.")
Component(excelSvc, "ExcelService", "Spring Service", "Parses Excel/ODS workbooks (Apache POI). Column indices configurable via application.properties. Creates/updates document records per row.") Component(tagTreeLoader, "TagTreeImporter", "Spring Component", "Upserts the tag hierarchy from canonical-tag-tree.xlsx via TagService (by canonical tag_path).")
Component(personRegLoader, "PersonRegisterImporter", "Spring Component", "Upserts register persons from canonical-persons.xlsx via PersonService (by normalizer person_id).")
Component(personTreeLoader, "PersonTreeImporter", "Spring Component", "Upserts tree persons + relationships from canonical-persons-tree.json via PersonService and RelationshipService.")
Component(docLoader, "DocumentImporter", "Spring Component", "Loads canonical-documents.xlsx: routes attribution register-first (raw cell always retained in sender_text/receiver_text), parses clean dates, keeps the S3 upload + thumbnail plumbing, and ports the path-traversal / homoglyph / absolute-path / %PDF magic-byte security guards.")
Component(sheetReader, "CanonicalSheetReader", "POI helper", "Maps a canonical .xlsx by header name (no positional indices), splits pipe-delimited list columns, fails closed (IMPORT_ARTIFACT_INVALID) on a missing required header.")
Component(minioConf, "MinioConfig", "Spring @Configuration", "Creates the S3Client and S3Presigner beans with path-style access for MinIO. Validates MinIO connectivity on startup.") Component(minioConf, "MinioConfig", "Spring @Configuration", "Creates the S3Client and S3Presigner beans with path-style access for MinIO. Validates MinIO connectivity on startup.")
Component(docRepo, "DocumentRepository", "Spring Data JPA", "Queries documents with Specification-based dynamic search, bidirectional conversation thread queries, full-text search with ranking and match highlighting, and transcription pipeline queue projections.") Component(docRepo, "DocumentRepository", "Spring Data JPA", "Queries documents with Specification-based dynamic search, bidirectional conversation thread queries, full-text search with ranking and match highlighting, and transcription pipeline queue projections.")
Component(docSpec, "DocumentSpecifications", "JPA Criteria API", "Factory for composable predicates: hasText (full-text), hasSender, hasReceiver, isBetween (date range), hasTags (subquery AND/OR logic).") Component(docSpec, "DocumentSpecifications", "JPA Criteria API", "Factory for composable predicates: hasText (full-text), hasSender, hasReceiver, isBetween (date range), hasTags (subquery AND/OR logic).")
} }
Component(personSvc, "PersonService", "Spring Service", "See diagram 3e. Called by DocumentService to resolve sender / receiver persons by ID.") Component(personSvc, "PersonService", "Spring Service", "See diagram 3e. Resolves sender / receiver persons by ID; upserts persons by source_ref for the importer.")
Component(tagSvc, "TagService", "Spring Service", "See diagram 3d. Called by DocumentService to find or create tags by name.") Component(tagSvc, "TagService", "Spring Service", "See diagram 3d. Finds or creates tags by name; upserts tags by source_ref for the importer.")
Component(relSvc, "RelationshipService", "Spring Service", "See diagram 3e. Creates family relationships from the person tree during import.")
Rel(frontend, docCtrl, "Document requests", "HTTP / JSON") Rel(frontend, docCtrl, "Document requests", "HTTP / JSON")
Rel(frontend, adminCtrl, "Trigger import", "HTTP / JSON") Rel(frontend, adminCtrl, "Trigger import", "HTTP / JSON")
Rel(docCtrl, docSvc, "Delegates to") Rel(docCtrl, docSvc, "Delegates to")
Rel(adminCtrl, massImport, "Triggers") Rel(adminCtrl, importOrch, "Triggers")
Rel(docSvc, fileSvc, "Upload / download files") Rel(docSvc, fileSvc, "Upload / download files")
Rel(docSvc, docRepo, "Reads / writes documents") Rel(docSvc, docRepo, "Reads / writes documents")
Rel(docSvc, docSpec, "Builds search predicates") Rel(docSvc, docSpec, "Builds search predicates")
Rel(docSvc, personSvc, "Resolves sender / receivers") Rel(docSvc, personSvc, "Resolves sender / receivers")
Rel(docSvc, tagSvc, "Finds or creates tags") Rel(docSvc, tagSvc, "Finds or creates tags")
Rel(massImport, excelSvc, "Parses Excel/ODS file") Rel(importOrch, tagTreeLoader, "1. Loads tags")
Rel(excelSvc, docSvc, "Creates / updates documents") Rel(importOrch, personRegLoader, "2. Loads register persons")
Rel(importOrch, personTreeLoader, "3. Loads tree persons + relationships")
Rel(importOrch, docLoader, "4. Loads documents")
Rel(tagTreeLoader, sheetReader, "Reads canonical .xlsx")
Rel(personRegLoader, sheetReader, "Reads canonical .xlsx")
Rel(docLoader, sheetReader, "Reads canonical .xlsx")
Rel(tagTreeLoader, tagSvc, "Upserts tags by source_ref")
Rel(personRegLoader, personSvc, "Upserts persons by source_ref")
Rel(personTreeLoader, personSvc, "Upserts persons by source_ref")
Rel(personTreeLoader, relSvc, "Creates relationships")
Rel(docLoader, docSvc, "Upserts documents by index")
Rel(docLoader, personSvc, "Register-first match / provisional person")
Rel(docLoader, tagSvc, "Attaches tag by source_ref")
Rel(docLoader, fileSvc, "Uploads resolved file")
Rel(minioConf, fileSvc, "Provides S3Client and S3Presigner beans") Rel(minioConf, fileSvc, "Provides S3Client and S3Presigner beans")
Rel(fileSvc, minio, "PUT / GET / presigned URL objects", "S3 API / HTTP") Rel(fileSvc, minio, "PUT / GET / presigned URL objects", "S3 API / HTTP")
Rel(docRepo, db, "SQL queries", "JDBC") Rel(docRepo, db, "SQL queries", "JDBC")

View File

@@ -14,6 +14,7 @@
"error_file_too_large": "Die Datei ist zu groß (max. 50 MB).", "error_file_too_large": "Die Datei ist zu groß (max. 50 MB).",
"error_user_not_found": "Der Benutzer wurde nicht gefunden.", "error_user_not_found": "Der Benutzer wurde nicht gefunden.",
"error_import_already_running": "Ein Import läuft bereits. Bitte warten Sie, bis dieser abgeschlossen ist.", "error_import_already_running": "Ein Import läuft bereits. Bitte warten Sie, bis dieser abgeschlossen ist.",
"error_import_artifact_invalid": "Eine Importdatei fehlt oder ist ungültig. Bitte führen Sie den Normalizer erneut aus.",
"error_invalid_credentials": "E-Mail-Adresse oder Passwort ist falsch.", "error_invalid_credentials": "E-Mail-Adresse oder Passwort ist falsch.",
"error_session_expired": "Ihre Sitzung ist abgelaufen. Bitte melden Sie sich erneut an.", "error_session_expired": "Ihre Sitzung ist abgelaufen. Bitte melden Sie sich erneut an.",
"error_session_expired_explainer": "Aus Sicherheitsgründen werden Sitzungen nach 8 Stunden Inaktivität automatisch beendet.", "error_session_expired_explainer": "Aus Sicherheitsgründen werden Sitzungen nach 8 Stunden Inaktivität automatisch beendet.",
@@ -356,11 +357,13 @@
"admin_system_import_status_done_label": "Dokumente verarbeitet", "admin_system_import_status_done_label": "Dokumente verarbeitet",
"admin_system_import_skipped_label": "übersprungen", "admin_system_import_skipped_label": "übersprungen",
"import_reason_invalid_pdf_signature": "Keine gültige PDF-Signatur", "import_reason_invalid_pdf_signature": "Keine gültige PDF-Signatur",
"import_reason_path_traversal": "Ungültiger Dateiname (Pfad)",
"import_reason_file_read_error": "Fehler beim Lesen der Datei", "import_reason_file_read_error": "Fehler beim Lesen der Datei",
"import_reason_s3_upload_failed": "Upload-Fehler (S3)", "import_reason_s3_upload_failed": "Upload-Fehler (S3)",
"import_reason_already_exists": "Bereits importiert", "import_reason_already_exists": "Bereits importiert",
"admin_system_import_status_failed": "Import fehlgeschlagen", "admin_system_import_status_failed": "Import fehlgeschlagen",
"admin_system_import_failed_no_spreadsheet": "Keine Tabellendatei gefunden.", "admin_system_import_failed_no_spreadsheet": "Keine Tabellendatei gefunden.",
"admin_system_import_failed_artifact": "Eine Importdatei fehlt oder ist ungültig.",
"admin_system_import_failed_internal": "Interner Fehler beim Import.", "admin_system_import_failed_internal": "Interner Fehler beim Import.",
"admin_system_thumbnails_heading": "Thumbnails erzeugen", "admin_system_thumbnails_heading": "Thumbnails erzeugen",
"admin_system_thumbnails_description": "Erzeugt Vorschaubilder für Dokumente ohne Thumbnail (z. B. nach dem Massenimport).", "admin_system_thumbnails_description": "Erzeugt Vorschaubilder für Dokumente ohne Thumbnail (z. B. nach dem Massenimport).",

View File

@@ -14,6 +14,7 @@
"error_file_too_large": "The file is too large (max. 50 MB).", "error_file_too_large": "The file is too large (max. 50 MB).",
"error_user_not_found": "User not found.", "error_user_not_found": "User not found.",
"error_import_already_running": "An import is already running. Please wait for it to finish.", "error_import_already_running": "An import is already running. Please wait for it to finish.",
"error_import_artifact_invalid": "A canonical import file is missing or invalid. Please re-run the normalizer.",
"error_invalid_credentials": "Email address or password is incorrect.", "error_invalid_credentials": "Email address or password is incorrect.",
"error_session_expired": "Your session has expired. Please sign in again.", "error_session_expired": "Your session has expired. Please sign in again.",
"error_session_expired_explainer": "For security reasons, sessions are automatically ended after 8 hours of inactivity.", "error_session_expired_explainer": "For security reasons, sessions are automatically ended after 8 hours of inactivity.",
@@ -356,11 +357,13 @@
"admin_system_import_status_done_label": "Documents processed", "admin_system_import_status_done_label": "Documents processed",
"admin_system_import_skipped_label": "skipped", "admin_system_import_skipped_label": "skipped",
"import_reason_invalid_pdf_signature": "Invalid PDF signature", "import_reason_invalid_pdf_signature": "Invalid PDF signature",
"import_reason_path_traversal": "Invalid filename (path)",
"import_reason_file_read_error": "File read error", "import_reason_file_read_error": "File read error",
"import_reason_s3_upload_failed": "Upload error (S3)", "import_reason_s3_upload_failed": "Upload error (S3)",
"import_reason_already_exists": "Already imported", "import_reason_already_exists": "Already imported",
"admin_system_import_status_failed": "Import failed", "admin_system_import_status_failed": "Import failed",
"admin_system_import_failed_no_spreadsheet": "No spreadsheet file found.", "admin_system_import_failed_no_spreadsheet": "No spreadsheet file found.",
"admin_system_import_failed_artifact": "A canonical import file is missing or invalid.",
"admin_system_import_failed_internal": "Import failed due to an internal error.", "admin_system_import_failed_internal": "Import failed due to an internal error.",
"admin_system_thumbnails_heading": "Generate thumbnails", "admin_system_thumbnails_heading": "Generate thumbnails",
"admin_system_thumbnails_description": "Generates preview images for documents without a thumbnail (e.g. after the mass import).", "admin_system_thumbnails_description": "Generates preview images for documents without a thumbnail (e.g. after the mass import).",

View File

@@ -14,6 +14,7 @@
"error_file_too_large": "El archivo es demasiado grande (máx. 50 MB).", "error_file_too_large": "El archivo es demasiado grande (máx. 50 MB).",
"error_user_not_found": "Usuario no encontrado.", "error_user_not_found": "Usuario no encontrado.",
"error_import_already_running": "Ya hay una importación en curso. Por favor, espere a que finalice.", "error_import_already_running": "Ya hay una importación en curso. Por favor, espere a que finalice.",
"error_import_artifact_invalid": "Falta un archivo de importación canónico o no es válido. Vuelva a ejecutar el normalizador.",
"error_invalid_credentials": "El correo electrónico o la contraseña son incorrectos.", "error_invalid_credentials": "El correo electrónico o la contraseña son incorrectos.",
"error_session_expired": "Su sesión ha expirado. Por favor, inicie sesión de nuevo.", "error_session_expired": "Su sesión ha expirado. Por favor, inicie sesión de nuevo.",
"error_session_expired_explainer": "Por razones de seguridad, las sesiones se terminan automáticamente tras 8 horas de inactividad.", "error_session_expired_explainer": "Por razones de seguridad, las sesiones se terminan automáticamente tras 8 horas de inactividad.",
@@ -356,11 +357,13 @@
"admin_system_import_status_done_label": "Documentos procesados", "admin_system_import_status_done_label": "Documentos procesados",
"admin_system_import_skipped_label": "omitidos", "admin_system_import_skipped_label": "omitidos",
"import_reason_invalid_pdf_signature": "Firma PDF no válida", "import_reason_invalid_pdf_signature": "Firma PDF no válida",
"import_reason_path_traversal": "Nombre de archivo no válido (ruta)",
"import_reason_file_read_error": "Error al leer el archivo", "import_reason_file_read_error": "Error al leer el archivo",
"import_reason_s3_upload_failed": "Error de carga (S3)", "import_reason_s3_upload_failed": "Error de carga (S3)",
"import_reason_already_exists": "Ya importado", "import_reason_already_exists": "Ya importado",
"admin_system_import_status_failed": "Importación fallida", "admin_system_import_status_failed": "Importación fallida",
"admin_system_import_failed_no_spreadsheet": "No se encontró ninguna hoja de cálculo.", "admin_system_import_failed_no_spreadsheet": "No se encontró ninguna hoja de cálculo.",
"admin_system_import_failed_artifact": "Falta un archivo de importación canónico o no es válido.",
"admin_system_import_failed_internal": "Error interno durante la importación.", "admin_system_import_failed_internal": "Error interno durante la importación.",
"admin_system_thumbnails_heading": "Generar miniaturas", "admin_system_thumbnails_heading": "Generar miniaturas",
"admin_system_thumbnails_description": "Genera imágenes de vista previa para documentos sin miniatura (p. ej. tras la importación masiva).", "admin_system_thumbnails_description": "Genera imágenes de vista previa para documentos sin miniatura (p. ej. tras la importación masiva).",

View File

@@ -17,6 +17,7 @@ export type ErrorCode =
| 'EMAIL_ALREADY_IN_USE' | 'EMAIL_ALREADY_IN_USE'
| 'WRONG_CURRENT_PASSWORD' | 'WRONG_CURRENT_PASSWORD'
| 'IMPORT_ALREADY_RUNNING' | 'IMPORT_ALREADY_RUNNING'
| 'IMPORT_ARTIFACT_INVALID'
| 'INVALID_RESET_TOKEN' | 'INVALID_RESET_TOKEN'
| 'INVITE_NOT_FOUND' | 'INVITE_NOT_FOUND'
| 'INVITE_EXHAUSTED' | 'INVITE_EXHAUSTED'
@@ -104,6 +105,8 @@ export function getErrorMessage(code: ErrorCode | string | undefined): string {
return m.error_wrong_current_password(); return m.error_wrong_current_password();
case 'IMPORT_ALREADY_RUNNING': case 'IMPORT_ALREADY_RUNNING':
return m.error_import_already_running(); return m.error_import_already_running();
case 'IMPORT_ARTIFACT_INVALID':
return m.error_import_artifact_invalid();
case 'INVALID_RESET_TOKEN': case 'INVALID_RESET_TOKEN':
return m.error_invalid_reset_token(); return m.error_invalid_reset_token();
case 'INVITE_NOT_FOUND': case 'INVITE_NOT_FOUND':

View File

@@ -13,10 +13,13 @@ let {
const failureMessage = $derived( const failureMessage = $derived(
importStatus?.statusCode === 'IMPORT_FAILED_NO_SPREADSHEET' importStatus?.statusCode === 'IMPORT_FAILED_NO_SPREADSHEET'
? m.admin_system_import_failed_no_spreadsheet() ? m.admin_system_import_failed_no_spreadsheet()
: m.admin_system_import_failed_internal() : importStatus?.statusCode === 'IMPORT_FAILED_ARTIFACT'
? m.admin_system_import_failed_artifact()
: m.admin_system_import_failed_internal()
); );
function reasonLabel(code: string): string { function reasonLabel(code: string): string {
if (code === 'INVALID_FILENAME_PATH_TRAVERSAL') return m.import_reason_path_traversal();
if (code === 'INVALID_PDF_SIGNATURE') return m.import_reason_invalid_pdf_signature(); if (code === 'INVALID_PDF_SIGNATURE') return m.import_reason_invalid_pdf_signature();
if (code === 'FILE_READ_ERROR') return m.import_reason_file_read_error(); if (code === 'FILE_READ_ERROR') return m.import_reason_file_read_error();
if (code === 'S3_UPLOAD_FAILED') return m.import_reason_s3_upload_failed(); if (code === 'S3_UPLOAD_FAILED') return m.import_reason_s3_upload_failed();