cells = new ArrayList<>(width);
+ for (int c = 0; c < width; c++) {
+ cells.add(cellToString(poiRow.getCell(c)));
+ }
+ return cells;
+ }
+
+ private static String cellToString(Cell cell) {
+ if (cell == null) return "";
+ return switch (cell.getCellType()) {
+ case STRING -> cell.getStringCellValue();
+ case NUMERIC -> {
+ if (DateUtil.isCellDateFormatted(cell)) {
+ yield cell.getLocalDateTimeCellValue().toLocalDate().toString();
+ }
+ yield String.valueOf((long) cell.getNumericCellValue());
+ }
+ case BOOLEAN -> String.valueOf(cell.getBooleanCellValue());
+ default -> "";
+ };
+ }
+}
diff --git a/backend/src/main/java/org/raddatz/familienarchiv/importing/DocumentImporter.java b/backend/src/main/java/org/raddatz/familienarchiv/importing/DocumentImporter.java
new file mode 100644
index 00000000..b85a8cc6
--- /dev/null
+++ b/backend/src/main/java/org/raddatz/familienarchiv/importing/DocumentImporter.java
@@ -0,0 +1,391 @@
+package org.raddatz.familienarchiv.importing;
+
+import lombok.RequiredArgsConstructor;
+import lombok.extern.slf4j.Slf4j;
+import org.raddatz.familienarchiv.document.DatePrecision;
+import org.raddatz.familienarchiv.document.Document;
+import org.raddatz.familienarchiv.document.DocumentService;
+import org.raddatz.familienarchiv.document.DocumentStatus;
+import org.raddatz.familienarchiv.document.ThumbnailAsyncRunner;
+import org.raddatz.familienarchiv.exception.DomainException;
+import org.raddatz.familienarchiv.exception.ErrorCode;
+import org.raddatz.familienarchiv.person.Person;
+import org.raddatz.familienarchiv.person.PersonService;
+import org.raddatz.familienarchiv.person.PersonType;
+import org.raddatz.familienarchiv.person.PersonUpsertCommand;
+import org.raddatz.familienarchiv.tag.Tag;
+import org.springframework.beans.factory.annotation.Value;
+import org.springframework.stereotype.Component;
+import org.springframework.transaction.annotation.Transactional;
+import software.amazon.awssdk.core.sync.RequestBody;
+import software.amazon.awssdk.services.s3.S3Client;
+import software.amazon.awssdk.services.s3.model.PutObjectRequest;
+
+import org.raddatz.familienarchiv.tag.TagService;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.file.Files;
+import java.nio.file.Paths;
+import java.time.LocalDate;
+import java.time.format.DateTimeParseException;
+import java.util.ArrayList;
+import java.util.LinkedHashSet;
+import java.util.List;
+import java.util.Optional;
+import java.util.Set;
+import java.util.UUID;
+import java.util.regex.Pattern;
+
+/**
+ * Loads {@code canonical-documents.xlsx} into the document domain. Java performs no
+ * semantic transformation: the normalizer already resolved people to slugs and dates to
+ * ISO values. This loader maps columns by header name, routes each attribution
+ * register-first (always retaining the raw cell in {@code sender_text}/{@code receiver_text}),
+ * parses clean dates, and keeps the S3/thumbnail plumbing.
+ *
+ * The import corpus is uniform — every PDF is named {@code .pdf} flat in the import
+ * dir — so a document's PDF is resolved directly by its index:
+ * {@code importDir.resolve(index + ".pdf")}. The {@code index} is still hostile input
+ * regardless of upstream trust (CWE-22 does not care it came from our Python tool): it is
+ * validated against a strict catalog pattern with {@link #isValidImportIndex} (no path
+ * separators, no {@code .}/{@code ..}, no absolute path, no slash homoglyphs) and the
+ * resolved path is asserted to stay inside the import dir in {@link #resolvePdfByIndex} as
+ * defense-in-depth. The {@code %PDF} magic-byte check still gates upload.
+ */
+@Component
+@RequiredArgsConstructor
+@Slf4j
+public class DocumentImporter {
+
+ static final List REQUIRED_HEADERS = List.of(
+ "index", "sender_person_id", "sender_name",
+ "receiver_person_ids", "receiver_names", "date_iso", "date_raw", "date_precision");
+
+ // Catalog index shape: 1–4 letters (ASCII + Latin-1 letters, e.g. the German "ü" in
+ // "Mü-0001"), one or more hyphens (the corpus has a few "C--0029" data-entry artefacts),
+ // digits, and an optional trailing "x" the normalizer recognises. Anchored, with no
+ // separator / dot / slash characters in the class, so ".pdf" can never traverse.
+ // NOTE: `\d` here is intentionally ASCII-only ([0-9]). Java's java.util.regex matches `\d`
+ // against [0-9] unless Pattern.UNICODE_CHARACTER_CLASS is set — do NOT add that flag, or
+ // Arabic-Indic / fullwidth digits would silently widen the accepted set.
+ private static final Pattern INDEX_PATTERN =
+ Pattern.compile("[A-Za-z\\u00C0-\\u00D6\\u00D8-\\u00F6\\u00F8-\\u00FF]{1,4}-+\\d+x?");
+
+ private final DocumentService documentService;
+ private final PersonService personService;
+ private final TagService tagService;
+ private final S3Client s3Client;
+ private final ThumbnailAsyncRunner thumbnailAsyncRunner;
+ private final FileStreamOpener fileStreamOpener;
+
+ @Value("${app.s3.bucket:familienarchiv}")
+ private String bucketName;
+
+ @Value("${app.import.dir:/import}")
+ private String importDir;
+
+ /** Outcome of loading the document sheet: processed count + per-file skips. */
+ public record LoadResult(int processed, List skippedFiles) {}
+
+ // One transaction for the whole sheet keeps the Hibernate session open so an existing
+ // document's lazy receivers collection initialises during an idempotent re-import.
+ // Invoked cross-bean from the orchestrator, so the @Transactional proxy applies.
+ @Transactional
+ public LoadResult load(File artifact) {
+ List rows = CanonicalSheetReader.readRows(artifact, REQUIRED_HEADERS);
+ int processed = 0;
+ List skipped = new ArrayList<>();
+ // 1-based source row number for ops triage breadcrumbs (the spreadsheet header is row 1,
+ // so the first data row is row 2 — matches what an operator sees in the .xlsx).
+ int rowNumber = 1;
+ for (CanonicalSheetReader.Row row : rows) {
+ rowNumber++;
+ String index = row.get("index");
+ if (index.isBlank()) continue;
+ Optional skipReason = importRow(row, index, rowNumber);
+ if (skipReason.isPresent()) {
+ skipped.add(new ImportStatus.SkippedFile(index, skipReason.get()));
+ } else {
+ processed++;
+ }
+ }
+ log.info("Imported {} documents from {} ({} skipped)", processed, artifact.getName(), skipped.size());
+ return new LoadResult(processed, skipped);
+ }
+
+ private Optional importRow(CanonicalSheetReader.Row row, String index, int rowNumber) {
+ if (!isValidImportIndex(index)) {
+ // Breadcrumb is the source row number, NOT the raw (possibly-hostile) index — an
+ // operator triaging the import can find the offending row in the .xlsx without us
+ // echoing attacker-controlled input into the log.
+ log.warn("Skipping import row {}: index rejected (fails catalog-shape validation)", rowNumber);
+ return Optional.of(ImportStatus.SkipReason.INVALID_FILENAME_PATH_TRAVERSAL);
+ }
+ Optional resolved = resolvePdfByIndex(index, rowNumber);
+ if (resolved.isEmpty()) {
+ // Distinct from the "index rejected" skip above: the index is VALID but no
+ // .pdf is on disk, so the row becomes a normal PLACEHOLDER (not skipped). The
+ // index is a validated catalog id (no hostile content), so it is safe to log here —
+ // this surfaces a corpus that drifts from the ".pdf" assumption (e.g. a file
+ // that arrived under a different name) rather than dropping it silently.
+ log.info("Import row {}: index {} is valid but {}.pdf is absent — creating PLACEHOLDER",
+ rowNumber, index, index);
+ } else {
+ try {
+ if (!isPdfMagicBytes(resolved.get())) {
+ return Optional.of(ImportStatus.SkipReason.INVALID_PDF_SIGNATURE);
+ }
+ } catch (IOException e) {
+ log.error("Magic-byte check failed for row {}", index, e);
+ return Optional.of(ImportStatus.SkipReason.FILE_READ_ERROR);
+ }
+ }
+ return persist(row, index, resolved);
+ }
+
+ private Optional persist(CanonicalSheetReader.Row row, String index, Optional file) {
+ Document existing = documentService.findByOriginalFilename(index).orElse(null);
+ if (existing != null && existing.getStatus() != DocumentStatus.PLACEHOLDER) {
+ return Optional.of(ImportStatus.SkipReason.ALREADY_EXISTS);
+ }
+
+ String s3Key = null;
+ String contentType = null;
+ DocumentStatus status = DocumentStatus.PLACEHOLDER;
+ if (file.isPresent()) {
+ contentType = probeContentType(file.get());
+ s3Key = "documents/" + UUID.randomUUID() + "_" + file.get().getName();
+ try {
+ uploadToS3(file.get(), s3Key, contentType);
+ status = DocumentStatus.UPLOADED;
+ } catch (Exception e) {
+ log.error("S3 upload failed for {}", file.get().getName(), e);
+ return Optional.of(ImportStatus.SkipReason.S3_UPLOAD_FAILED);
+ }
+ }
+
+ Document doc = buildDocument(row, index, existing, s3Key, contentType, status);
+ Document saved = documentService.save(doc);
+ if (file.isPresent()) {
+ thumbnailAsyncRunner.dispatchAfterCommit(saved.getId());
+ }
+ return Optional.empty();
+ }
+
+ private Document buildDocument(CanonicalSheetReader.Row row, String index, Document existing,
+ String s3Key, String contentType, DocumentStatus status) {
+ Document doc = existing != null ? existing
+ : Document.builder().originalFilename(index).build();
+ applyAttribution(doc, row);
+ applyDates(doc, row);
+ applyAuthoritativeAssociations(doc, row);
+ applyFileMetadata(doc, s3Key, contentType, status, index);
+ applyComputedFlags(doc);
+ return doc;
+ }
+
+ // Sender + raw sender/receiver text. The raw cells are always retained verbatim, even
+ // when a person is linked — the load-bearing invariant behind the merge story (ADR-025).
+ private void applyAttribution(Document doc, CanonicalSheetReader.Row row) {
+ String senderName = row.get("sender_name");
+ String receiverNames = row.get("receiver_names");
+ Person sender = resolveSender(row.get("sender_person_id"), senderName);
+ doc.setSender(sender);
+ doc.setSenderText(blankToNull(senderName));
+ doc.setReceiverText(blankToNull(receiverNames));
+ }
+
+ // Date triplet + raw + location. Pure value parsing, no semantic logic.
+ private void applyDates(Document doc, CanonicalSheetReader.Row row) {
+ doc.setDocumentDate(parseIsoDate(row.get("date_iso")));
+ doc.setMetaDatePrecision(parsePrecision(row.get("date_precision")));
+ doc.setMetaDateEnd(parseIsoDate(row.get("date_end")));
+ doc.setMetaDateRaw(blankToNull(row.get("date_raw")));
+ doc.setLocation(blankToNull(row.get("location")));
+ doc.setSummary(blankToNull(row.get("summary")));
+ }
+
+ // Receivers and tags are owned by the canonical row (ADR-025): clear then re-populate so a
+ // shrunk set on re-import prunes stale links rather than accumulating them. The
+ // "preserve human edits" rule does NOT extend to these collections.
+ private void applyAuthoritativeAssociations(Document doc, CanonicalSheetReader.Row row) {
+ Set receivers = resolveReceivers(row.get("receiver_person_ids"), row.get("receiver_names"));
+ doc.getReceivers().clear();
+ doc.getReceivers().addAll(receivers);
+ attachTag(doc, row.get("tags"));
+ }
+
+ // S3 key, content type, status, and the index-derived title.
+ private void applyFileMetadata(Document doc, String s3Key, String contentType,
+ DocumentStatus status, String index) {
+ doc.setStatus(status);
+ doc.setFilePath(s3Key);
+ doc.setContentType(contentType);
+ doc.setTitle(buildTitle(index, doc.getDocumentDate(), doc.getMetaDatePrecision(),
+ doc.getMetaDateEnd(), doc.getMetaDateRaw(), doc.getLocation()));
+ }
+
+ // metadataComplete: a document counts as fully described if any of the three "who/when"
+ // pieces is filled. Called last so the upstream setters have already populated the doc.
+ private void applyComputedFlags(Document doc) {
+ doc.setMetadataComplete(doc.getDocumentDate() != null
+ || doc.getSender() != null
+ || !doc.getReceivers().isEmpty());
+ }
+
+ // The title carries the date at the HONEST precision (never a fabricated day) via the
+ // shared DocumentTitleFormatter, plus the location — kept under 20 lines by delegating.
+ private static String buildTitle(String index, LocalDate date, DatePrecision precision,
+ LocalDate end, String raw, String location) {
+ StringBuilder title = new StringBuilder(index);
+ if (date != null && precision != DatePrecision.UNKNOWN) {
+ title.append(" – ").append(DocumentTitleFormatter.formatTitleDate(date, precision, end, raw));
+ }
+ if (location != null && !location.isBlank()) {
+ title.append(" – ").append(location);
+ }
+ return title.toString();
+ }
+
+ // ─── attribution routing — register-first, always retain raw ─────────────────────
+
+ private Person resolveSender(String slug, String rawName) {
+ if (slug.isBlank()) return null;
+ return resolvePerson(slug, rawName);
+ }
+
+ // Zips the parallel `receiver_person_ids` and `receiver_names` columns by position so an
+ // unresolved receiver becomes a provisional Person whose lastName is the human name from
+ // `receiver_names`, not the slug. If the names list is shorter than the slugs list (rare —
+ // canonical data zips them 1:1), missing entries fall back to slug-as-name.
+ private Set resolveReceivers(String slugs, String names) {
+ List slugList = CanonicalSheetReader.splitList(slugs);
+ List nameList = CanonicalSheetReader.splitList(names);
+ Set receivers = new LinkedHashSet<>();
+ for (int i = 0; i < slugList.size(); i++) {
+ String slug = slugList.get(i);
+ String name = i < nameList.size() ? nameList.get(i) : slug;
+ receivers.add(resolvePerson(slug, name));
+ }
+ return receivers;
+ }
+
+ private Person resolvePerson(String slug, String rawName) {
+ return personService.findBySourceRef(slug)
+ .orElseGet(() -> personService.upsertBySourceRef(PersonUpsertCommand.builder()
+ .sourceRef(slug)
+ .lastName(blankToNull(rawName) == null ? slug : rawName)
+ .personType(PersonType.PERSON)
+ .provisional(true)
+ .build()));
+ }
+
+ // Authoritative: the canonical row defines the document's tags exactly. Clearing first
+ // means a tag removed from the row is pruned on re-import (ADR-025).
+ private void attachTag(Document doc, String tagPath) {
+ doc.getTags().clear();
+ if (tagPath.isBlank()) return;
+ tagService.findBySourceRef(tagPath).ifPresent(tag -> doc.getTags().add(tag));
+ }
+
+ // ─── clean-value parsing (no semantic logic) ─────────────────────────────────────
+
+ private static LocalDate parseIsoDate(String value) {
+ if (value == null || value.isBlank()) return null;
+ try {
+ return LocalDate.parse(value.trim());
+ } catch (DateTimeParseException e) {
+ return null;
+ }
+ }
+
+ private static DatePrecision parsePrecision(String value) {
+ if (value == null || value.isBlank()) return DatePrecision.UNKNOWN;
+ try {
+ return DatePrecision.valueOf(value.trim());
+ } catch (IllegalArgumentException e) {
+ return DatePrecision.UNKNOWN;
+ }
+ }
+
+ // ─── file handling + S3 (small ≤20-line methods) ─────────────────────────────────
+
+ private String probeContentType(File file) {
+ try {
+ String probed = Files.probeContentType(file.toPath());
+ return probed != null ? probed : "application/octet-stream";
+ } catch (IOException e) {
+ return "application/octet-stream";
+ }
+ }
+
+ private void uploadToS3(File file, String s3Key, String contentType) {
+ s3Client.putObject(PutObjectRequest.builder()
+ .bucket(bucketName)
+ .key(s3Key)
+ .contentType(contentType)
+ .build(),
+ RequestBody.fromFile(file));
+ }
+
+ // ─── index validation + containment — defense-in-depth, do not weaken ────────────
+
+ // The index is the only thing that drives the on-disk lookup, so it must never contain a
+ // path separator, traversal token, slash homoglyph, null byte, or absolute-path marker —
+ // each guard mirrors the filename guards ported from MassImportService — and it must match
+ // the strict catalog shape so anything unexpected is skipped loudly rather than read.
+ private boolean isValidImportIndex(String index) {
+ if (index == null || index.isBlank()) return false;
+ if (index.contains("/")) return false;
+ if (index.contains("\\")) return false;
+ if (index.contains("∕")) return false; // U+2215 DIVISION SLASH
+ if (index.contains("/")) return false; // U+FF0F FULLWIDTH SOLIDUS
+ if (index.contains("⧵")) return false; // U+29F5 REVERSE SOLIDUS OPERATOR
+ if (index.contains(".")) return false; // no dots — ".pdf" is the only extension
+ if (index.contains("\0")) return false;
+ if (Paths.get(index).isAbsolute()) return false;
+ return INDEX_PATTERN.matcher(index).matches();
+ }
+
+ private boolean isPdfMagicBytes(File file) throws IOException {
+ // FileStreamOpener is injected so tests can stub a throwing implementation for the
+ // IO-error branch without spying on the importer itself.
+ try (InputStream is = fileStreamOpener.open(file)) {
+ byte[] header = is.readNBytes(4);
+ return header.length == 4
+ && header[0] == 0x25 // %
+ && header[1] == 0x50 // P
+ && header[2] == 0x44 // D
+ && header[3] == 0x46; // F
+ }
+ }
+
+ // O(1) direct lookup: the PDF is exactly importDir/.pdf. The caller has already
+ // validated the index shape; the canonical-path containment assertion below is
+ // defense-in-depth so even a symlinked .pdf cannot read outside importDir.
+ private Optional resolvePdfByIndex(String index, int rowNumber) {
+ File baseDir = new File(importDir);
+ File candidate = baseDir.toPath().resolve(index + ".pdf").toFile();
+ try {
+ if (!candidate.isFile()) return Optional.empty();
+ String baseDirCanonical = baseDir.getCanonicalPath();
+ if (!candidate.getCanonicalPath().startsWith(baseDirCanonical + File.separator)) {
+ throw DomainException.internal(ErrorCode.INTERNAL_ERROR, "Path escape detected: " + candidate);
+ }
+ return Optional.of(candidate);
+ } catch (IOException e) {
+ // Distinct from the deliberate symlink-escape abort above (which throws): canonical
+ // resolution itself failed (e.g. the OS rejected the path mid-resolution). We fail
+ // safe to a PLACEHOLDER, but never silently — log it so the asymmetry surfaces in ops.
+ log.warn("Canonical path resolution failed for import row {}: treating {}.pdf as absent",
+ rowNumber, index, e);
+ return Optional.empty();
+ }
+ }
+
+ private static String blankToNull(String s) {
+ return (s == null || s.isBlank()) ? null : s;
+ }
+}
diff --git a/backend/src/main/java/org/raddatz/familienarchiv/importing/DocumentTitleFormatter.java b/backend/src/main/java/org/raddatz/familienarchiv/importing/DocumentTitleFormatter.java
new file mode 100644
index 00000000..65120004
--- /dev/null
+++ b/backend/src/main/java/org/raddatz/familienarchiv/importing/DocumentTitleFormatter.java
@@ -0,0 +1,112 @@
+package org.raddatz.familienarchiv.importing;
+
+import org.raddatz.familienarchiv.document.DatePrecision;
+
+import java.time.LocalDate;
+import java.time.format.DateTimeFormatter;
+import java.util.Locale;
+
+/**
+ * Produces the honest German date label baked into an import title — at exactly
+ * the precision the data claims, never finer. This is the Java half of the
+ * single source of truth shared with the frontend {@code formatDocumentDate}
+ * (TypeScript): both are asserted against {@code docs/date-label-fixtures.json}
+ * so the two implementations cannot drift (see #666).
+ *
+ * Import titles are always German, so the labels here are the German
+ * canonical form (mirroring the {@code de} Paraglide messages used by the UI).
+ */
+final class DocumentTitleFormatter {
+
+ private static final DateTimeFormatter LONG = DateTimeFormatter.ofPattern("d. MMMM yyyy", Locale.GERMAN);
+ private static final DateTimeFormatter MONTH_YEAR = DateTimeFormatter.ofPattern("MMMM yyyy", Locale.GERMAN);
+ private static final DateTimeFormatter MEDIUM = DateTimeFormatter.ofPattern("d. MMM yyyy", Locale.GERMAN);
+ private static final DateTimeFormatter DAY_MONTH = DateTimeFormatter.ofPattern("d. MMM", Locale.GERMAN);
+
+ private static final String UNKNOWN = "Datum unbekannt";
+ private static final String APPROX_PREFIX = "ca.";
+ private static final String OPEN_RANGE_PREFIX = "ab";
+
+ private DocumentTitleFormatter() {
+ }
+
+ /**
+ * @param date the sort/filter anchor day; null for UNKNOWN rows
+ * @param precision descriptive precision metadata
+ * @param end the RANGE end day; null means an open-ended range
+ * @param raw the verbatim spreadsheet cell, used only to pick a season word
+ * @return the honest German label
+ */
+ static String formatTitleDate(LocalDate date, DatePrecision precision, LocalDate end, String raw) {
+ if (precision == DatePrecision.UNKNOWN || date == null) {
+ return UNKNOWN;
+ }
+ return switch (precision) {
+ case DAY -> LONG.format(date);
+ case MONTH -> MONTH_YEAR.format(date);
+ case SEASON -> seasonLabel(date, raw);
+ case YEAR -> String.valueOf(date.getYear());
+ case APPROX -> APPROX_PREFIX + " " + date.getYear();
+ case RANGE -> rangeLabel(date, end);
+ case UNKNOWN -> UNKNOWN;
+ };
+ }
+
+ private static String seasonLabel(LocalDate date, String raw) {
+ Season season = seasonFromRaw(raw);
+ if (season == null) {
+ season = seasonOfMonth(date.getMonthValue());
+ }
+ return season.german + " " + date.getYear();
+ }
+
+ private static String rangeLabel(LocalDate start, LocalDate end) {
+ if (end == null) {
+ return OPEN_RANGE_PREFIX + " " + MEDIUM.format(start);
+ }
+ if (end.equals(start)) {
+ return MEDIUM.format(start);
+ }
+ if (start.getYear() != end.getYear()) {
+ return MEDIUM.format(start) + " – " + MEDIUM.format(end);
+ }
+ if (start.getMonthValue() == end.getMonthValue()) {
+ return start.getDayOfMonth() + ".–" + MEDIUM.format(end);
+ }
+ return DAY_MONTH.format(start) + " – " + MEDIUM.format(end);
+ }
+
+ // ─── season mapping — mirrors the normalizer's representative months ─────────────
+
+ private enum Season {
+ SPRING("Frühling"),
+ SUMMER("Sommer"),
+ AUTUMN("Herbst"),
+ WINTER("Winter");
+
+ private final String german;
+
+ Season(String german) {
+ this.german = german;
+ }
+ }
+
+ private static Season seasonOfMonth(int month) {
+ if (month >= 3 && month <= 5) return Season.SPRING;
+ if (month >= 6 && month <= 8) return Season.SUMMER;
+ if (month >= 9 && month <= 11) return Season.AUTUMN;
+ return Season.WINTER;
+ }
+
+ private static Season seasonFromRaw(String raw) {
+ if (raw == null || raw.isBlank()) return null;
+ String token = raw.trim().split("\\s+")[0].toLowerCase(Locale.GERMAN);
+ return switch (token) {
+ case "frühling", "frühjahr" -> Season.SPRING;
+ case "sommer" -> Season.SUMMER;
+ case "herbst" -> Season.AUTUMN;
+ case "winter" -> Season.WINTER;
+ default -> null;
+ };
+ }
+}
diff --git a/backend/src/main/java/org/raddatz/familienarchiv/importing/FileStreamOpener.java b/backend/src/main/java/org/raddatz/familienarchiv/importing/FileStreamOpener.java
new file mode 100644
index 00000000..aa4c2e50
--- /dev/null
+++ b/backend/src/main/java/org/raddatz/familienarchiv/importing/FileStreamOpener.java
@@ -0,0 +1,33 @@
+package org.raddatz.familienarchiv.importing;
+
+import org.springframework.stereotype.Component;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+
+/**
+ * Test seam for opening a {@link File} as an {@link InputStream}. Extracted so the magic-byte
+ * check in {@link DocumentImporter} can be unit-tested for the IO-error branch by injecting a
+ * mock that throws, without needing a Mockito spy on the importer itself.
+ *
+ *
Production uses {@link DefaultFileStreamOpener}, a one-line delegate to
+ * {@code new FileInputStream(file)}.
+ */
+@FunctionalInterface
+public interface FileStreamOpener {
+
+ /** Opens {@code file} for sequential reads. Caller closes the returned stream. */
+ InputStream open(File file) throws IOException;
+
+ /** Default production implementation: plain {@code FileInputStream}. */
+ @Component
+ final class DefaultFileStreamOpener implements FileStreamOpener {
+
+ @Override
+ public InputStream open(File file) throws IOException {
+ return new FileInputStream(file);
+ }
+ }
+}
diff --git a/backend/src/main/java/org/raddatz/familienarchiv/importing/ImportStatus.java b/backend/src/main/java/org/raddatz/familienarchiv/importing/ImportStatus.java
new file mode 100644
index 00000000..ae21adc2
--- /dev/null
+++ b/backend/src/main/java/org/raddatz/familienarchiv/importing/ImportStatus.java
@@ -0,0 +1,50 @@
+package org.raddatz.familienarchiv.importing;
+
+import com.fasterxml.jackson.annotation.JsonIgnore;
+import com.fasterxml.jackson.annotation.JsonProperty;
+import io.swagger.v3.oas.annotations.media.Schema;
+
+import java.time.LocalDateTime;
+import java.util.List;
+
+/**
+ * Async import state surfaced to {@code admin/system/ImportStatusCard.svelte} via the
+ * generated types. The shape ({@code state, statusCode, processed, skippedFiles, skipped})
+ * is kept verbatim from the retired MassImportService so the admin UI keeps working.
+ */
+public record ImportStatus(
+ @Schema(requiredMode = Schema.RequiredMode.REQUIRED) State state,
+ @Schema(requiredMode = Schema.RequiredMode.REQUIRED) String statusCode,
+ @JsonIgnore String message,
+ @Schema(requiredMode = Schema.RequiredMode.REQUIRED) int processed,
+ @Schema(requiredMode = Schema.RequiredMode.REQUIRED) List skippedFiles,
+ LocalDateTime startedAt
+) {
+
+ public enum State { IDLE, RUNNING, DONE, FAILED }
+
+ public enum SkipReason {
+ INVALID_FILENAME_PATH_TRAVERSAL,
+ INVALID_PDF_SIGNATURE,
+ FILE_READ_ERROR,
+ ALREADY_EXISTS,
+ S3_UPLOAD_FAILED
+ }
+
+ public record SkippedFile(
+ @Schema(requiredMode = Schema.RequiredMode.REQUIRED) String filename,
+ @Schema(requiredMode = Schema.RequiredMode.REQUIRED) SkipReason reason
+ ) {}
+
+ // Note: @Schema on a record accessor method is not picked up by SpringDoc; the
+ // "skipped" count is a computed convenience field derived from skippedFiles.size().
+ @JsonProperty("skipped")
+ public int skipped() {
+ return skippedFiles.size();
+ }
+
+ /** Defensive-copy constructor — callers cannot mutate the stored list after construction. */
+ public ImportStatus {
+ skippedFiles = List.copyOf(skippedFiles);
+ }
+}
diff --git a/backend/src/main/java/org/raddatz/familienarchiv/importing/MassImportService.java b/backend/src/main/java/org/raddatz/familienarchiv/importing/MassImportService.java
deleted file mode 100644
index 975517e7..00000000
--- a/backend/src/main/java/org/raddatz/familienarchiv/importing/MassImportService.java
+++ /dev/null
@@ -1,509 +0,0 @@
-package org.raddatz.familienarchiv.importing;
-
-import com.fasterxml.jackson.annotation.JsonIgnore;
-import com.fasterxml.jackson.annotation.JsonProperty;
-import io.swagger.v3.oas.annotations.media.Schema;
-import lombok.RequiredArgsConstructor;
-import lombok.extern.slf4j.Slf4j;
-import org.apache.poi.ss.usermodel.*;
-import java.util.Objects;
-import org.raddatz.familienarchiv.exception.DomainException;
-import org.raddatz.familienarchiv.exception.ErrorCode;
-import org.raddatz.familienarchiv.document.Document;
-import org.raddatz.familienarchiv.document.DocumentService;
-import org.raddatz.familienarchiv.document.DocumentStatus;
-import org.raddatz.familienarchiv.document.ThumbnailAsyncRunner;
-import org.raddatz.familienarchiv.person.Person;
-import org.raddatz.familienarchiv.tag.Tag;
-import org.raddatz.familienarchiv.person.Person;
-import org.raddatz.familienarchiv.person.PersonNameParser;
-import org.raddatz.familienarchiv.person.PersonService;
-import org.raddatz.familienarchiv.tag.TagService;
-import org.springframework.beans.factory.annotation.Value;
-import org.springframework.scheduling.annotation.Async;
-import org.springframework.stereotype.Service;
-import org.springframework.transaction.annotation.Transactional;
-import org.w3c.dom.Element;
-import org.w3c.dom.NodeList;
-import software.amazon.awssdk.core.sync.RequestBody;
-import software.amazon.awssdk.services.s3.S3Client;
-import software.amazon.awssdk.services.s3.model.PutObjectRequest;
-
-import javax.xml.parsers.DocumentBuilderFactory;
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.IOException;
-import java.io.InputStream;
-import java.nio.file.Files;
-import java.nio.file.Path;
-import java.nio.file.Paths;
-import java.time.LocalDate;
-import java.time.LocalDateTime;
-import java.time.format.DateTimeFormatter;
-import java.time.format.DateTimeParseException;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Locale;
-import java.util.Optional;
-import java.util.UUID;
-import java.util.stream.Stream;
-import java.util.zip.ZipFile;
-
-@Service
-@RequiredArgsConstructor
-@Slf4j
-public class MassImportService {
-
- public enum State { IDLE, RUNNING, DONE, FAILED }
-
- public enum SkipReason {
- INVALID_FILENAME_PATH_TRAVERSAL,
- INVALID_PDF_SIGNATURE,
- FILE_READ_ERROR,
- ALREADY_EXISTS,
- S3_UPLOAD_FAILED
- }
-
- public record SkippedFile(
- @Schema(requiredMode = Schema.RequiredMode.REQUIRED) String filename,
- @Schema(requiredMode = Schema.RequiredMode.REQUIRED) SkipReason reason
- ) {}
-
- public record ImportStatus(
- @Schema(requiredMode = Schema.RequiredMode.REQUIRED) State state,
- @Schema(requiredMode = Schema.RequiredMode.REQUIRED) String statusCode,
- @JsonIgnore String message,
- @Schema(requiredMode = Schema.RequiredMode.REQUIRED) int processed,
- @Schema(requiredMode = Schema.RequiredMode.REQUIRED) List skippedFiles,
- LocalDateTime startedAt
- ) {
- // Note: @Schema on a record accessor method is not picked up by SpringDoc; the
- // "skipped" count is a computed convenience field derived from skippedFiles.size().
- @JsonProperty("skipped")
- public int skipped() { return skippedFiles.size(); }
-
- /** Defensive-copy constructor — callers cannot mutate the stored list after construction. */
- public ImportStatus {
- skippedFiles = List.copyOf(skippedFiles);
- }
- }
-
- record ProcessResult(int processed, List skippedFiles) {}
-
- private volatile ImportStatus currentStatus = new ImportStatus(State.IDLE, "IMPORT_IDLE", "Kein Import gestartet.", 0, List.of(), null);
-
- public ImportStatus getStatus() {
- return currentStatus;
- }
-
- private final DocumentService documentService;
- private final PersonService personService;
- private final TagService tagService;
- private final S3Client s3Client;
- private final ThumbnailAsyncRunner thumbnailAsyncRunner;
-
- @Value("${app.s3.bucket}")
- private String bucketName;
-
- @Value("${app.import.col.index:0}")
- private int colIndex;
-
- @Value("${app.import.col.box:1}")
- private int colBox;
-
- @Value("${app.import.col.folder:2}")
- private int colFolder;
-
- @Value("${app.import.col.sender:3}")
- private int colSender;
-
- @Value("${app.import.col.receivers:5}")
- private int colReceivers;
-
- @Value("${app.import.col.date:7}")
- private int colDate;
-
- @Value("${app.import.col.location:9}")
- private int colLocation;
-
- @Value("${app.import.col.tags:10}")
- private int colTags;
-
- @Value("${app.import.col.summary:11}")
- private int colSummary;
-
- @Value("${app.import.col.transcription:13}")
- private int colTranscription;
-
- @Value("${app.import.dir:/import}")
- private String importDir;
-
- private static final DateTimeFormatter GERMAN_DATE = DateTimeFormatter.ofPattern("d. MMMM yyyy", Locale.GERMAN);
-
- // ODS XML namespaces
- private static final String NS_TABLE = "urn:oasis:names:tc:opendocument:xmlns:table:1.0";
- private static final String NS_TEXT = "urn:oasis:names:tc:opendocument:xmlns:text:1.0";
-
- // We only need up to this many columns; caps repeated-empty-cell expansion
- private static final int MAX_COLS = 20;
-
- @Async
- public void runImportAsync() {
- if (currentStatus.state() == State.RUNNING) {
- throw DomainException.conflict(ErrorCode.IMPORT_ALREADY_RUNNING, "A mass import is already in progress");
- }
- currentStatus = new ImportStatus(State.RUNNING, "IMPORT_RUNNING", "Import läuft...", 0, List.of(), LocalDateTime.now());
- try {
- File spreadsheet = findSpreadsheetFile();
- log.info("Starte Massenimport aus: {}", spreadsheet.getAbsolutePath());
- ProcessResult result = processRows(readSpreadsheet(spreadsheet));
- currentStatus = new ImportStatus(State.DONE, "IMPORT_DONE",
- "Import abgeschlossen. " + result.processed() + " Dokumente verarbeitet.",
- result.processed(), result.skippedFiles(), currentStatus.startedAt());
- } catch (NoSpreadsheetException e) {
- log.error("Massenimport fehlgeschlagen: keine Tabellendatei", e);
- currentStatus = new ImportStatus(State.FAILED, "IMPORT_FAILED_NO_SPREADSHEET",
- "Fehler: " + e.getMessage(), 0, List.of(), currentStatus.startedAt());
- } catch (Exception e) {
- log.error("Massenimport fehlgeschlagen", e);
- currentStatus = new ImportStatus(State.FAILED, "IMPORT_FAILED_INTERNAL",
- "Fehler: " + e.getMessage(), 0, List.of(), currentStatus.startedAt());
- }
- }
-
- private static class NoSpreadsheetException extends RuntimeException {
- NoSpreadsheetException(String message) { super(message); }
- }
-
- private File findSpreadsheetFile() throws IOException {
- try (Stream files = Files.list(Paths.get(importDir))) {
- return files
- .filter(p -> {
- String name = p.toString().toLowerCase();
- return name.endsWith(".ods") || name.endsWith(".xlsx") || name.endsWith(".xls");
- })
- .findFirst()
- .orElseThrow(() -> new NoSpreadsheetException(
- "Keine Tabellendatei (.ods/.xlsx/.xls) in " + importDir + " gefunden!"))
- .toFile();
- }
- }
-
- // --- Spreadsheet reading (format-specific, produces neutral List>) ---
-
- private List> readSpreadsheet(File file) throws Exception {
- String name = file.getName().toLowerCase();
- if (name.endsWith(".ods")) {
- return readOds(file);
- }
- return readXlsx(file);
- }
-
- /**
- * Reads an ODS file by parsing its content.xml directly (no extra library needed).
- * ODS is a ZIP archive; content.xml holds the spreadsheet data as XML.
- */
- List> readOds(File file) throws Exception {
- List> result = new ArrayList<>();
-
- try (ZipFile zip = new ZipFile(file)) {
- var entry = zip.getEntry("content.xml");
- if (entry == null) throw new RuntimeException("Ungültige ODS-Datei: content.xml fehlt");
-
- var factory = XxeSafeXmlParser.hardenedFactory();
- factory.setNamespaceAware(true);
- var builder = factory.newDocumentBuilder();
- var doc = builder.parse(zip.getInputStream(entry));
-
- NodeList tables = doc.getElementsByTagNameNS(NS_TABLE, "table");
- if (tables.getLength() == 0) return result;
-
- var table = (Element) tables.item(0);
- NodeList rows = table.getElementsByTagNameNS(NS_TABLE, "table-row");
-
- for (int i = 0; i < rows.getLength(); i++) {
- var row = (Element) rows.item(i);
- List rowData = new ArrayList<>();
- NodeList cells = row.getElementsByTagNameNS(NS_TABLE, "table-cell");
-
- for (int j = 0; j < cells.getLength() && rowData.size() < MAX_COLS; j++) {
- var cell = (Element) cells.item(j);
-
- // Read the display text (first )
- String value = "";
- NodeList textNodes = cell.getElementsByTagNameNS(NS_TEXT, "p");
- if (textNodes.getLength() > 0) {
- value = textNodes.item(0).getTextContent().trim();
- }
-
- // Expand number-columns-repeated (capped at MAX_COLS)
- String repeatAttr = cell.getAttributeNS(NS_TABLE, "number-columns-repeated");
- int repeat = repeatAttr.isEmpty() ? 1 : Integer.parseInt(repeatAttr);
- repeat = Math.min(repeat, MAX_COLS - rowData.size());
-
- for (int r = 0; r < repeat; r++) {
- rowData.add(value);
- }
- }
- result.add(rowData);
- }
- }
- return result;
- }
-
- /** Reads an XLSX/XLS file using Apache POI. Converts all cells to strings. */
- private List> readXlsx(File file) throws Exception {
- List> result = new ArrayList<>();
- try (FileInputStream fis = new FileInputStream(file);
- Workbook workbook = WorkbookFactory.create(fis)) {
-
- Sheet sheet = workbook.getSheetAt(0);
- for (int i = 0; i <= sheet.getLastRowNum(); i++) {
- Row row = sheet.getRow(i);
- List rowData = new ArrayList<>();
- if (row != null) {
- for (int j = 0; j < MAX_COLS; j++) {
- rowData.add(xlsxCellToString(row.getCell(j)));
- }
- }
- result.add(rowData);
- }
- }
- return result;
- }
-
- private String xlsxCellToString(Cell cell) {
- if (cell == null) return "";
- return switch (cell.getCellType()) {
- case STRING -> cell.getStringCellValue();
- case NUMERIC -> {
- if (DateUtil.isCellDateFormatted(cell)) {
- yield cell.getLocalDateTimeCellValue().toLocalDate().toString(); // ISO
- }
- yield String.valueOf((int) cell.getNumericCellValue());
- }
- case BOOLEAN -> String.valueOf(cell.getBooleanCellValue());
- default -> "";
- };
- }
-
- // --- Import logic (works on neutral List rows) ---
-
- private ProcessResult processRows(List> rows) {
- int processed = 0;
- List skippedFiles = new ArrayList<>();
-
- for (int i = 1; i < rows.size(); i++) { // skip header row
- List cells = rows.get(i);
- String index = getCell(cells, colIndex);
- if (index.isBlank()) continue;
-
- String filename = index.contains(".") ? index : index + ".pdf";
- if (!isValidImportFilename(filename)) {
- log.warn("Skipping import row {}: filename rejected — {}", i, filename);
- skippedFiles.add(new SkippedFile(filename, SkipReason.INVALID_FILENAME_PATH_TRAVERSAL));
- continue;
- }
- Optional fileOnDisk = findFileRecursive(filename);
- if (fileOnDisk.isEmpty()) {
- log.warn("Datei nicht gefunden, importiere nur Metadaten: {}", filename);
- }
-
- if (fileOnDisk.isPresent()) {
- try {
- if (!isPdfMagicBytes(fileOnDisk.get())) {
- log.warn("Überspringe {}: Datei beginnt nicht mit %PDF-Signatur", filename);
- skippedFiles.add(new SkippedFile(filename, SkipReason.INVALID_PDF_SIGNATURE));
- continue;
- }
- } catch (IOException e) {
- log.error("Fehler beim Prüfen der Magic-Bytes für {}", filename, e);
- skippedFiles.add(new SkippedFile(filename, SkipReason.FILE_READ_ERROR));
- continue;
- }
- }
-
- Optional skipReason = importSingleDocument(cells, fileOnDisk, filename, index);
- if (skipReason.isPresent()) {
- skippedFiles.add(new SkippedFile(filename, skipReason.get()));
- } else {
- processed++;
- }
- }
- return new ProcessResult(processed, skippedFiles);
- }
-
- private boolean isValidImportFilename(String filename) {
- if (filename == null || filename.isBlank()) return false;
- if (filename.contains("/")) return false;
- if (filename.contains("\\")) return false;
- if (filename.contains("∕")) return false; // U+2215 DIVISION SLASH
- if (filename.contains("/")) return false; // U+FF0F FULLWIDTH SOLIDUS
- if (filename.contains("⧵")) return false; // U+29F5 REVERSE SOLIDUS OPERATOR
- if (filename.contains("..")) return false;
- if (filename.equals(".")) return false;
- if (filename.contains("\0")) return false;
- // Paths.get() is safe here on Linux for all inputs that passed the checks above;
- // it may throw InvalidPathException for OS-specific illegal chars on Windows,
- // but those are not reachable in production.
- if (Paths.get(filename).isAbsolute()) return false;
- return true;
- }
-
- // package-private: Mockito spy in tests can override to inject IOException
- InputStream openFileStream(File file) throws IOException {
- return new FileInputStream(file);
- }
-
- private boolean isPdfMagicBytes(File file) throws IOException {
- try (InputStream is = openFileStream(file)) {
- byte[] header = is.readNBytes(4);
- return header.length == 4
- && header[0] == 0x25 // %
- && header[1] == 0x50 // P
- && header[2] == 0x44 // D
- && header[3] == 0x46; // F
- }
- }
-
- /**
- * Imports a single document row.
- *
- * @return empty Optional on success; an Optional containing the skip reason on failure/skip.
- */
- @Transactional
- protected Optional importSingleDocument(List cells, Optional file, String originalFilename, String index) {
- Optional existing = documentService.findByOriginalFilename(originalFilename);
- if (existing.isPresent() && existing.get().getStatus() != DocumentStatus.PLACEHOLDER) {
- log.info("Dokument {} existiert bereits, überspringe.", originalFilename);
- return Optional.of(SkipReason.ALREADY_EXISTS);
- }
-
- String archiveBox = getCell(cells, colBox);
- String archiveFolder = getCell(cells, colFolder);
- String senderRaw = getCell(cells, colSender);
- String receiversRaw = getCell(cells, colReceivers);
- LocalDate date = parseDate(getCell(cells, colDate));
- String location = getCell(cells, colLocation);
- String tagRaw = getCell(cells, colTags);
- String summary = getCell(cells, colSummary);
- String transcription = getCell(cells, colTranscription);
-
- String s3Key = null;
- String contentType = null;
- DocumentStatus status = DocumentStatus.PLACEHOLDER;
-
- if (file.isPresent()) {
- try {
- contentType = Files.probeContentType(file.get().toPath());
- } catch (IOException e) {
- contentType = null;
- }
- if (contentType == null) contentType = "application/octet-stream";
-
- s3Key = "documents/" + UUID.randomUUID() + "_" + file.get().getName();
- try {
- s3Client.putObject(PutObjectRequest.builder()
- .bucket(bucketName)
- .key(s3Key)
- .contentType(contentType)
- .build(),
- RequestBody.fromFile(file.get()));
- status = DocumentStatus.UPLOADED;
- } catch (Exception e) {
- log.error("S3 Upload Fehler für {}", file.get().getName(), e);
- return Optional.of(SkipReason.S3_UPLOAD_FAILED);
- }
- }
-
- Person sender = senderRaw.isBlank() ? null : findOrCreatePerson(senderRaw);
- List receivers = PersonNameParser.parseReceivers(receiversRaw).stream()
- .map(this::findOrCreatePerson)
- .filter(Objects::nonNull)
- .toList();
-
- Tag tag = null;
- if (!tagRaw.isBlank()) {
- tag = tagService.findOrCreate(tagRaw);
- }
-
- Document doc = existing.orElse(Document.builder()
- .originalFilename(originalFilename)
- .build());
-
- // Heuristic: mark as complete if at least one key field is present in the spreadsheet row
- boolean metadataComplete = date != null || !senderRaw.isBlank() || !receiversRaw.isBlank();
-
- doc.setTitle(buildTitle(index, date, location));
- doc.setFilePath(s3Key);
- doc.setContentType(contentType);
- doc.setStatus(status);
- doc.setArchiveBox(archiveBox.isBlank() ? null : archiveBox);
- doc.setArchiveFolder(archiveFolder.isBlank() ? null : archiveFolder);
- doc.setDocumentDate(date);
- doc.setLocation(location.isBlank() ? null : location);
- doc.setSummary(summary.isBlank() ? null : summary);
- doc.setTranscription(transcription.isBlank() ? null : transcription);
- doc.setSender(sender);
- doc.getReceivers().addAll(receivers);
- if (tag != null) doc.getTags().add(tag);
- doc.setMetadataComplete(metadataComplete);
-
- Document saved = documentService.save(doc);
- if (file.isPresent()) {
- thumbnailAsyncRunner.dispatchAfterCommit(saved.getId());
- }
- log.info("Importiert{}: {}", file.isEmpty() ? " (nur Metadaten)" : "", originalFilename);
- return Optional.empty();
- }
-
- // --- Helpers ---
-
- private String getCell(List cells, int col) {
- if (col >= cells.size()) return "";
- String val = cells.get(col);
- return val == null ? "" : val.trim();
- }
-
- private LocalDate parseDate(String value) {
- if (value == null || value.isBlank()) return null;
- try {
- return LocalDate.parse(value.trim());
- } catch (DateTimeParseException e) {
- return null;
- }
- }
-
- private String buildTitle(String index, LocalDate date, String location) {
- StringBuilder sb = new StringBuilder(index);
- if (date != null) {
- sb.append(" \u2013 ").append(date.format(GERMAN_DATE));
- }
- if (location != null && !location.isBlank()) {
- sb.append(" \u2013 ").append(location);
- }
- return sb.toString();
- }
-
- private Person findOrCreatePerson(String rawName) {
- return personService.findOrCreateByAlias(rawName);
- }
-
- private Optional findFileRecursive(String filename) {
- File baseDir = new File(importDir);
- try (Stream walk = Files.walk(baseDir.toPath())) {
- Optional match = walk.filter(p -> !Files.isDirectory(p))
- .filter(p -> p.getFileName().toString().equals(filename))
- .findFirst();
- if (match.isEmpty()) return Optional.empty();
- File candidate = match.get().toFile();
- String baseDirCanonical = baseDir.getCanonicalPath();
- if (!candidate.getCanonicalPath().startsWith(baseDirCanonical + File.separator)) {
- throw DomainException.internal(ErrorCode.INTERNAL_ERROR, "Path escape detected: " + candidate);
- }
- return Optional.of(candidate);
- } catch (IOException e) {
- return Optional.empty();
- }
- }
-}
diff --git a/backend/src/main/java/org/raddatz/familienarchiv/importing/PersonRegisterImporter.java b/backend/src/main/java/org/raddatz/familienarchiv/importing/PersonRegisterImporter.java
new file mode 100644
index 00000000..edad55d2
--- /dev/null
+++ b/backend/src/main/java/org/raddatz/familienarchiv/importing/PersonRegisterImporter.java
@@ -0,0 +1,69 @@
+package org.raddatz.familienarchiv.importing;
+
+import lombok.RequiredArgsConstructor;
+import lombok.extern.slf4j.Slf4j;
+import org.raddatz.familienarchiv.person.PersonService;
+import org.raddatz.familienarchiv.person.PersonType;
+import org.raddatz.familienarchiv.person.PersonUpsertCommand;
+import org.springframework.stereotype.Component;
+
+import java.io.File;
+import java.time.LocalDate;
+import java.time.format.DateTimeParseException;
+import java.util.List;
+
+/**
+ * Loads {@code canonical-persons.xlsx} (the register) into the person domain via
+ * {@link PersonService}, upserting each person by the normalizer {@code person_id}
+ * (source_ref). Register persons are confident identities, so {@code provisional} is
+ * driven by the sheet's already-clean value (normally {@code False}).
+ */
+@Component
+@RequiredArgsConstructor
+@Slf4j
+public class PersonRegisterImporter {
+
+ static final List REQUIRED_HEADERS = List.of("person_id", "last_name", "first_name", "provisional");
+
+ private final PersonService personService;
+
+ public int load(File artifact) {
+ List rows = CanonicalSheetReader.readRows(artifact, REQUIRED_HEADERS);
+ int processed = 0;
+ for (CanonicalSheetReader.Row row : rows) {
+ String personId = row.get("person_id");
+ if (personId.isBlank()) continue;
+ personService.upsertBySourceRef(toCommand(row, personId));
+ processed++;
+ }
+ log.info("Imported {} register persons from {}", processed, artifact.getName());
+ return processed;
+ }
+
+ private PersonUpsertCommand toCommand(CanonicalSheetReader.Row row, String personId) {
+ return PersonUpsertCommand.builder()
+ .sourceRef(personId)
+ .lastName(blankToNull(row.get("last_name")))
+ .firstName(blankToNull(row.get("first_name")))
+ .maidenName(blankToNull(row.get("maiden_name")))
+ .notes(blankToNull(row.get("notes")))
+ .birthYear(yearOf(row.get("birth_date")))
+ .deathYear(yearOf(row.get("death_date")))
+ .personType(PersonType.PERSON)
+ .provisional(Boolean.parseBoolean(row.get("provisional")))
+ .build();
+ }
+
+ private static Integer yearOf(String isoDate) {
+ if (isoDate == null || isoDate.isBlank()) return null;
+ try {
+ return LocalDate.parse(isoDate.trim()).getYear();
+ } catch (DateTimeParseException e) {
+ return null;
+ }
+ }
+
+ private static String blankToNull(String s) {
+ return (s == null || s.isBlank()) ? null : s;
+ }
+}
diff --git a/backend/src/main/java/org/raddatz/familienarchiv/importing/PersonTreeImporter.java b/backend/src/main/java/org/raddatz/familienarchiv/importing/PersonTreeImporter.java
new file mode 100644
index 00000000..26ae0dcd
--- /dev/null
+++ b/backend/src/main/java/org/raddatz/familienarchiv/importing/PersonTreeImporter.java
@@ -0,0 +1,135 @@
+package org.raddatz.familienarchiv.importing;
+
+import com.fasterxml.jackson.databind.JsonNode;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import lombok.RequiredArgsConstructor;
+import lombok.extern.slf4j.Slf4j;
+import org.raddatz.familienarchiv.exception.DomainException;
+import org.raddatz.familienarchiv.exception.ErrorCode;
+import org.raddatz.familienarchiv.person.Person;
+import org.raddatz.familienarchiv.person.PersonService;
+import org.raddatz.familienarchiv.person.PersonType;
+import org.raddatz.familienarchiv.person.PersonUpsertCommand;
+import org.raddatz.familienarchiv.person.relationship.RelationType;
+import org.raddatz.familienarchiv.person.relationship.RelationshipService;
+import org.raddatz.familienarchiv.person.relationship.dto.CreateRelationshipRequest;
+import org.springframework.stereotype.Component;
+
+import java.io.File;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.UUID;
+
+/**
+ * Loads {@code canonical-persons-tree.json} into the person + relationship domains.
+ * Tree persons are upserted via {@link PersonService} keyed on the shared
+ * {@code personId} slug (which Phase 1 #670 now emits into the tree), so they reconcile
+ * with the register rather than duplicating it. Relationships reference persons by the
+ * tree's local {@code rowId}; each side is mapped to the upserted person's UUID and
+ * created through {@link RelationshipService} (never the relationship repository —
+ * layering rule). A duplicate relationship on re-import is swallowed for idempotency.
+ */
+@Component
+@RequiredArgsConstructor
+@Slf4j
+public class PersonTreeImporter {
+
+ // The tree JSON is a local implementation detail, not a shared API payload, so the
+ // importer owns its own mapper rather than depending on the web ObjectMapper bean.
+ private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
+
+ private final PersonService personService;
+ private final RelationshipService relationshipService;
+
+ public int load(File artifact) {
+ JsonNode root = readTree(artifact);
+ Map idByRowId = upsertPersons(root.path("persons"));
+ int relationships = createRelationships(root.path("relationships"), idByRowId);
+ log.info("Imported {} tree persons and {} relationships from {}",
+ idByRowId.size(), relationships, artifact.getName());
+ return idByRowId.size();
+ }
+
+ private JsonNode readTree(File artifact) {
+ try {
+ return OBJECT_MAPPER.readTree(artifact);
+ } catch (Exception e) {
+ throw DomainException.badRequest(ErrorCode.IMPORT_ARTIFACT_INVALID,
+ "Unreadable canonical artifact: " + artifact.getName());
+ }
+ }
+
+ private Map upsertPersons(JsonNode persons) {
+ Map idByRowId = new HashMap<>();
+ for (JsonNode node : persons) {
+ String personId = text(node, "personId");
+ if (personId.isBlank()) continue;
+ Person person = personService.upsertBySourceRef(toCommand(node, personId));
+ idByRowId.put(text(node, "rowId"), person.getId());
+ }
+ return idByRowId;
+ }
+
+ private PersonUpsertCommand toCommand(JsonNode node, String personId) {
+ return PersonUpsertCommand.builder()
+ .sourceRef(personId)
+ .lastName(blankToNull(text(node, "lastName")))
+ .firstName(blankToNull(text(node, "firstName")))
+ .maidenName(blankToNull(text(node, "maidenName")))
+ .notes(blankToNull(text(node, "notes")))
+ .birthYear(intOrNull(node, "birthYear"))
+ .deathYear(intOrNull(node, "deathYear"))
+ .familyMember(node.path("familyMember").asBoolean(false))
+ .personType(PersonType.PERSON)
+ .provisional(false)
+ .build();
+ }
+
+ private int createRelationships(JsonNode relationships, Map idByRowId) {
+ int created = 0;
+ for (JsonNode node : relationships) {
+ // Trap: a relationship node's personId / relatedPersonId fields carry the tree's
+ // local rowId (e.g. "row_a"), NOT a person slug. They are resolved through
+ // idByRowId to the upserted person's UUID.
+ UUID person = idByRowId.get(text(node, "personId"));
+ UUID related = idByRowId.get(text(node, "relatedPersonId"));
+ if (person == null || related == null) {
+ log.warn("Skipping tree relationship with unresolved rowId: {} -> {}",
+ text(node, "personId"), text(node, "relatedPersonId"));
+ continue;
+ }
+ if (addRelationshipIdempotently(person, related, text(node, "type"))) {
+ created++;
+ }
+ }
+ return created;
+ }
+
+ private boolean addRelationshipIdempotently(UUID person, UUID related, String type) {
+ try {
+ relationshipService.addRelationship(person,
+ new CreateRelationshipRequest(related, RelationType.valueOf(type), null, null, null));
+ return true;
+ } catch (DomainException e) {
+ if (e.getCode() == ErrorCode.DUPLICATE_RELATIONSHIP
+ || e.getCode() == ErrorCode.CIRCULAR_RELATIONSHIP) {
+ return false;
+ }
+ throw e;
+ }
+ }
+
+ private static String text(JsonNode node, String field) {
+ JsonNode value = node.get(field);
+ return value == null || value.isNull() ? "" : value.asText();
+ }
+
+ private static Integer intOrNull(JsonNode node, String field) {
+ JsonNode value = node.get(field);
+ return value == null || value.isNull() ? null : value.asInt();
+ }
+
+ private static String blankToNull(String s) {
+ return (s == null || s.isBlank()) ? null : s;
+ }
+}
diff --git a/backend/src/main/java/org/raddatz/familienarchiv/importing/TagTreeImporter.java b/backend/src/main/java/org/raddatz/familienarchiv/importing/TagTreeImporter.java
new file mode 100644
index 00000000..a871ab32
--- /dev/null
+++ b/backend/src/main/java/org/raddatz/familienarchiv/importing/TagTreeImporter.java
@@ -0,0 +1,54 @@
+package org.raddatz.familienarchiv.importing;
+
+import lombok.RequiredArgsConstructor;
+import lombok.extern.slf4j.Slf4j;
+import org.raddatz.familienarchiv.tag.Tag;
+import org.raddatz.familienarchiv.tag.TagService;
+import org.springframework.stereotype.Component;
+
+import java.io.File;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.UUID;
+
+/**
+ * Loads {@code canonical-tag-tree.xlsx} into the tag domain via {@link TagService},
+ * upserting each tag by its canonical {@code tag_path} (the source_ref). Parent links are
+ * resolved by the parent's path, which is the child path with its last {@code /segment}
+ * stripped. Rows are emitted parents-first by the normalizer, so a parent is always
+ * resolved before any child references it.
+ */
+@Component
+@RequiredArgsConstructor
+@Slf4j
+public class TagTreeImporter {
+
+ static final List REQUIRED_HEADERS = List.of("tag_path", "parent_name", "tag_name");
+ private static final String PATH_SEPARATOR = "/";
+
+ private final TagService tagService;
+
+ public int load(File artifact) {
+ List rows = CanonicalSheetReader.readRows(artifact, REQUIRED_HEADERS);
+ Map idByPath = new HashMap<>();
+ int processed = 0;
+ for (CanonicalSheetReader.Row row : rows) {
+ String path = row.get("tag_path");
+ if (path.isBlank()) continue;
+ UUID parentId = resolveParentId(path, idByPath);
+ Tag tag = tagService.upsertBySourceRef(path, row.get("tag_name"), parentId);
+ idByPath.put(path, tag.getId());
+ processed++;
+ }
+ log.info("Imported {} tags from {}", processed, artifact.getName());
+ return processed;
+ }
+
+ private UUID resolveParentId(String path, Map idByPath) {
+ int lastSeparator = path.lastIndexOf(PATH_SEPARATOR);
+ if (lastSeparator < 0) return null;
+ String parentPath = path.substring(0, lastSeparator);
+ return idByPath.get(parentPath);
+ }
+}
diff --git a/backend/src/main/java/org/raddatz/familienarchiv/importing/XxeSafeXmlParser.java b/backend/src/main/java/org/raddatz/familienarchiv/importing/XxeSafeXmlParser.java
deleted file mode 100644
index 949ea054..00000000
--- a/backend/src/main/java/org/raddatz/familienarchiv/importing/XxeSafeXmlParser.java
+++ /dev/null
@@ -1,20 +0,0 @@
-package org.raddatz.familienarchiv.importing;
-
-import javax.xml.parsers.DocumentBuilderFactory;
-import javax.xml.parsers.ParserConfigurationException;
-
-class XxeSafeXmlParser {
-
- private XxeSafeXmlParser() {}
-
- static DocumentBuilderFactory hardenedFactory() throws ParserConfigurationException {
- var factory = DocumentBuilderFactory.newInstance();
- factory.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true);
- factory.setFeature("http://xml.org/sax/features/external-general-entities", false);
- factory.setFeature("http://xml.org/sax/features/external-parameter-entities", false);
- factory.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
- factory.setXIncludeAware(false);
- factory.setExpandEntityReferences(false);
- return factory;
- }
-}
diff --git a/backend/src/main/java/org/raddatz/familienarchiv/person/Person.java b/backend/src/main/java/org/raddatz/familienarchiv/person/Person.java
index d2332519..993480c4 100644
--- a/backend/src/main/java/org/raddatz/familienarchiv/person/Person.java
+++ b/backend/src/main/java/org/raddatz/familienarchiv/person/Person.java
@@ -57,6 +57,18 @@ public class Person {
@Schema(requiredMode = Schema.RequiredMode.REQUIRED)
private boolean familyMember = false;
+ // The normalizer person_id — join key and re-import idempotency key. Null for manually
+ // created persons; unique among non-null values (see ADR-025).
+ @Column(name = "source_ref")
+ private String sourceRef;
+
+ // A provisional person is one the importer inferred but could not confidently identify.
+ // Distinct from familyMember (a genealogical fact); set true only by the importer (Phase 3).
+ @Column(name = "provisional", nullable = false)
+ @Builder.Default
+ @Schema(requiredMode = Schema.RequiredMode.REQUIRED)
+ private boolean provisional = false;
+
// Entity-graph navigation for JPA JOIN queries (e.g. DocumentSpecifications.hasText).
// Uses entity relationship rather than cross-domain repository access, avoiding a
// separate DB roundtrip while respecting domain boundaries.
diff --git a/backend/src/main/java/org/raddatz/familienarchiv/person/PersonController.java b/backend/src/main/java/org/raddatz/familienarchiv/person/PersonController.java
index 5c47cbde..dad52b5a 100644
--- a/backend/src/main/java/org/raddatz/familienarchiv/person/PersonController.java
+++ b/backend/src/main/java/org/raddatz/familienarchiv/person/PersonController.java
@@ -22,12 +22,15 @@ import org.springframework.web.bind.annotation.*;
import org.springframework.web.server.ResponseStatusException;
import jakarta.validation.Valid;
+import jakarta.validation.constraints.Max;
+import jakarta.validation.constraints.Min;
import lombok.RequiredArgsConstructor;
@RestController
@RequestMapping("/api/persons")
@RequiredArgsConstructor
+@Validated
public class PersonController {
private final PersonService personService;
@@ -35,15 +38,37 @@ public class PersonController {
@GetMapping
@RequirePermission(Permission.READ_ALL)
- public ResponseEntity> getPersons(
+ public ResponseEntity getPersons(
@RequestParam(required = false) String q,
- @RequestParam(required = false, defaultValue = "0") int size,
- @RequestParam(required = false) String sort) {
- if ("documentCount".equals(sort) && size > 0 && q == null) {
+ @RequestParam(required = false) PersonType type,
+ @RequestParam(required = false) Boolean familyOnly,
+ @RequestParam(required = false) Boolean hasDocuments,
+ @RequestParam(required = false) Boolean provisional,
+ // review=true reveals the import noise (transcriber view); absent/false keeps the
+ // clean reader default (familyMember OR documentCount > 0). The explicit filters AND
+ // within whichever base the review flag selects.
+ @RequestParam(required = false, defaultValue = "false") boolean review,
+ @RequestParam(required = false) String sort,
+ @RequestParam(defaultValue = "0") @Min(0) int page,
+ @RequestParam(defaultValue = "50") @Min(1) @Max(100) int size) {
+ // Legacy top-N-by-document-count path (reader dashboard): preserved, wrapped in the
+ // same envelope so /api/persons always returns one shape. It is explicitly NON-paged —
+ // the top-N query returns the complete result, so PersonSearchResult.topN reports an
+ // honest totalElements (= returned count) instead of pretending to be a page slice.
+ if ("documentCount".equals(sort) && q == null) {
int safeSize = Math.min(size, 50);
- return ResponseEntity.ok(personService.findTopByDocumentCount(safeSize));
+ List top = personService.findTopByDocumentCount(safeSize);
+ return ResponseEntity.ok(PersonSearchResult.topN(top));
}
- return ResponseEntity.ok(personService.findAll(q));
+
+ PersonFilter filter = PersonFilter.builder()
+ .type(type)
+ .familyOnly(familyOnly)
+ .hasDocuments(hasDocuments)
+ .provisional(provisional)
+ .readerDefault(!review)
+ .build();
+ return ResponseEntity.ok(personService.search(filter, page, size, q));
}
@GetMapping("/{id}")
@@ -110,6 +135,21 @@ public class PersonController {
personService.mergePersons(id, UUID.fromString(targetIdStr));
}
+ // Dedicated state transition that clears the provisional flag. A separate verb (not a
+ // mass-assignable DTO field) so provisional can never be smuggled in via create/update.
+ @PatchMapping("/{id}/confirm")
+ @RequirePermission(Permission.WRITE_ALL)
+ public ResponseEntity confirmPerson(@PathVariable UUID id) {
+ return ResponseEntity.ok(personService.confirmPerson(id));
+ }
+
+ @DeleteMapping("/{id}")
+ @ResponseStatus(HttpStatus.NO_CONTENT)
+ @RequirePermission(Permission.WRITE_ALL)
+ public void deletePerson(@PathVariable UUID id) {
+ personService.deletePerson(id);
+ }
+
// ─── Alias endpoints ────────────────────────────────────────────────────
@GetMapping("/{id}/aliases")
diff --git a/backend/src/main/java/org/raddatz/familienarchiv/person/PersonFilter.java b/backend/src/main/java/org/raddatz/familienarchiv/person/PersonFilter.java
new file mode 100644
index 00000000..bc41214a
--- /dev/null
+++ b/backend/src/main/java/org/raddatz/familienarchiv/person/PersonFilter.java
@@ -0,0 +1,36 @@
+package org.raddatz.familienarchiv.person;
+
+import lombok.Builder;
+
+/**
+ * The reader/triage filter set for the persons directory, threaded as one value through
+ * {@code PersonController -> PersonService -> PersonRepository}. Each field is nullable:
+ * null means "do not constrain on this dimension".
+ *
+ *
+ * - {@code type} — restrict to a single {@link PersonType}.
+ * - {@code familyOnly} — when true, only {@code familyMember} persons.
+ * - {@code hasDocuments} — when true, only persons with documentCount > 0.
+ * - {@code provisional} — match the {@code Person.provisional} flag exactly.
+ * - {@code readerDefault} — when true, restrict to {@code familyMember OR documentCount > 0}
+ * (the clean reader view). The explicit filters above AND with this restriction.
+ *
+ */
+@Builder
+public record PersonFilter(
+ PersonType type,
+ Boolean familyOnly,
+ Boolean hasDocuments,
+ Boolean provisional,
+ boolean readerDefault
+) {
+ /** The unconstrained "show all" filter (transcriber view, no reader restriction). */
+ public static PersonFilter showAll() {
+ return PersonFilter.builder().readerDefault(false).build();
+ }
+
+ /** The clean reader default: familyMember OR documentCount > 0, no other constraints. */
+ public static PersonFilter cleanDefault() {
+ return PersonFilter.builder().readerDefault(true).build();
+ }
+}
diff --git a/backend/src/main/java/org/raddatz/familienarchiv/person/PersonRepository.java b/backend/src/main/java/org/raddatz/familienarchiv/person/PersonRepository.java
index 6f431b74..50ff4ee9 100644
--- a/backend/src/main/java/org/raddatz/familienarchiv/person/PersonRepository.java
+++ b/backend/src/main/java/org/raddatz/familienarchiv/person/PersonRepository.java
@@ -32,6 +32,9 @@ public interface PersonRepository extends JpaRepository {
// Lookup by full alias string, used during ODS mass import
Optional findByAliasIgnoreCase(String alias);
+ // Lookup by the normalizer person_id, used for idempotent canonical re-import (Phase 3).
+ Optional findBySourceRef(String sourceRef);
+
// Exact first+last name match, used for filename-based sender lookup
Optional findByFirstNameIgnoreCaseAndLastNameIgnoreCase(String firstName, String lastName);
@@ -41,7 +44,7 @@ public interface PersonRepository extends JpaRepository {
SELECT p.id, p.title, p.first_name AS firstName, p.last_name AS lastName,
p.person_type AS personType,
p.alias, p.birth_year AS birthYear, p.death_year AS deathYear, p.notes,
- p.family_member AS familyMember,
+ p.family_member AS familyMember, p.provisional AS provisional,
(SELECT COUNT(*) FROM documents d WHERE d.sender_id = p.id)
+ (SELECT COUNT(*) FROM document_receivers dr WHERE dr.person_id = p.id) AS documentCount
FROM persons p
@@ -54,7 +57,7 @@ public interface PersonRepository extends JpaRepository {
SELECT p.id, p.title, p.first_name AS firstName, p.last_name AS lastName,
p.person_type AS personType,
p.alias, p.birth_year AS birthYear, p.death_year AS deathYear, p.notes,
- p.family_member AS familyMember,
+ p.family_member AS familyMember, p.provisional AS provisional,
(SELECT COUNT(*) FROM documents d WHERE d.sender_id = p.id)
+ (SELECT COUNT(*) FROM document_receivers dr WHERE dr.person_id = p.id) AS documentCount
FROM persons p
@@ -63,7 +66,7 @@ public interface PersonRepository extends JpaRepository {
OR LOWER(CONCAT(p.last_name,' ',COALESCE(p.first_name,''))) LIKE LOWER(CONCAT('%',:query,'%'))
OR LOWER(p.alias) LIKE LOWER(CONCAT('%',:query,'%'))
OR LOWER(a.last_name) LIKE LOWER(CONCAT('%',:query,'%'))
- GROUP BY p.id, p.title, p.first_name, p.last_name, p.person_type, p.alias, p.birth_year, p.death_year, p.notes, p.family_member
+ GROUP BY p.id, p.title, p.first_name, p.last_name, p.person_type, p.alias, p.birth_year, p.death_year, p.notes, p.family_member, p.provisional
ORDER BY p.last_name ASC, p.first_name ASC
""",
nativeQuery = true)
@@ -75,7 +78,7 @@ public interface PersonRepository extends JpaRepository {
SELECT p.id, p.title, p.first_name AS firstName, p.last_name AS lastName,
p.person_type AS personType,
p.alias, p.birth_year AS birthYear, p.death_year AS deathYear, p.notes,
- p.family_member AS familyMember,
+ p.family_member AS familyMember, p.provisional AS provisional,
(SELECT COUNT(*) FROM documents d WHERE d.sender_id = p.id)
+ (SELECT COUNT(*) FROM document_receivers dr WHERE dr.person_id = p.id) AS documentCount
FROM persons p
@@ -85,6 +88,61 @@ public interface PersonRepository extends JpaRepository {
nativeQuery = true)
List findTopByDocumentCount(@Param("limit") int limit);
+ // --- #667: filter-aware paged directory ---
+ //
+ // The slice query and the count query below MUST keep an IDENTICAL WHERE clause so the
+ // rendered page and totalElements can never drift. Every filter is nullable: a null param
+ // disables that predicate via the `:param IS NULL OR …` idiom. `readerDefault` (a plain
+ // boolean) restricts to "familyMember OR has documents"; the explicit filters AND on top.
+ // documentCount is recomputed inline (not via the SELECT alias) because WHERE cannot
+ // reference a computed alias. All params are named — no string concatenation, no injection.
+ String FILTER_WHERE = """
+ WHERE (CAST(:type AS text) IS NULL OR p.person_type = CAST(:type AS text))
+ AND (:familyOnly = FALSE OR :familyOnly IS NULL OR p.family_member = TRUE)
+ AND (:hasDocuments = FALSE OR :hasDocuments IS NULL OR (
+ (SELECT COUNT(*) FROM documents d WHERE d.sender_id = p.id)
+ + (SELECT COUNT(*) FROM document_receivers dr WHERE dr.person_id = p.id)) > 0)
+ AND (:provisional IS NULL OR p.provisional = :provisional)
+ AND (:readerDefault = FALSE OR (
+ p.family_member = TRUE OR (
+ (SELECT COUNT(*) FROM documents d WHERE d.sender_id = p.id)
+ + (SELECT COUNT(*) FROM document_receivers dr WHERE dr.person_id = p.id)) > 0))
+ AND (CAST(:query AS text) IS NULL OR
+ LOWER(CONCAT(COALESCE(p.first_name,''),' ',p.last_name)) LIKE LOWER(CONCAT('%',CAST(:query AS text),'%'))
+ OR LOWER(CONCAT(p.last_name,' ',COALESCE(p.first_name,''))) LIKE LOWER(CONCAT('%',CAST(:query AS text),'%'))
+ OR LOWER(p.alias) LIKE LOWER(CONCAT('%',CAST(:query AS text),'%')))
+ """;
+
+ @Query(value = """
+ SELECT p.id, p.title, p.first_name AS firstName, p.last_name AS lastName,
+ p.person_type AS personType,
+ p.alias, p.birth_year AS birthYear, p.death_year AS deathYear, p.notes,
+ p.family_member AS familyMember, p.provisional AS provisional,
+ (SELECT COUNT(*) FROM documents d WHERE d.sender_id = p.id)
+ + (SELECT COUNT(*) FROM document_receivers dr WHERE dr.person_id = p.id) AS documentCount
+ FROM persons p
+ """ + FILTER_WHERE + """
+ ORDER BY p.last_name ASC, p.first_name ASC
+ LIMIT :limit OFFSET :offset
+ """,
+ nativeQuery = true)
+ List findByFilter(@Param("type") String type,
+ @Param("familyOnly") Boolean familyOnly,
+ @Param("hasDocuments") Boolean hasDocuments,
+ @Param("provisional") Boolean provisional,
+ @Param("readerDefault") boolean readerDefault,
+ @Param("query") String query,
+ @Param("limit") int limit,
+ @Param("offset") int offset);
+
+ @Query(value = "SELECT COUNT(*) FROM persons p " + FILTER_WHERE, nativeQuery = true)
+ long countByFilter(@Param("type") String type,
+ @Param("familyOnly") Boolean familyOnly,
+ @Param("hasDocuments") Boolean hasDocuments,
+ @Param("provisional") Boolean provisional,
+ @Param("readerDefault") boolean readerDefault,
+ @Param("query") String query);
+
// --- Correspondent queries ---
@Query(value = """
@@ -136,6 +194,12 @@ public interface PersonRepository extends JpaRepository {
@Query(value = "UPDATE documents SET sender_id = :target WHERE sender_id = :source", nativeQuery = true)
void reassignSender(@Param("source") UUID source, @Param("target") UUID target);
+ // Used by deletePerson: detach a deleted person from documents they sent, so the hard
+ // delete cannot orphan a documents.sender_id FK (the column is nullable).
+ @Modifying
+ @Query(value = "UPDATE documents SET sender_id = NULL WHERE sender_id = :source", nativeQuery = true)
+ void reassignSenderToNull(@Param("source") UUID source);
+
@Modifying
@Query(value = """
INSERT INTO document_receivers (document_id, person_id)
diff --git a/backend/src/main/java/org/raddatz/familienarchiv/person/PersonSearchResult.java b/backend/src/main/java/org/raddatz/familienarchiv/person/PersonSearchResult.java
new file mode 100644
index 00000000..ff605770
--- /dev/null
+++ b/backend/src/main/java/org/raddatz/familienarchiv/person/PersonSearchResult.java
@@ -0,0 +1,50 @@
+package org.raddatz.familienarchiv.person;
+
+import io.swagger.v3.oas.annotations.media.Schema;
+
+import java.util.List;
+
+/**
+ * Paged result for the /api/persons list endpoint.
+ *
+ * Hand-written to mirror {@code document/DocumentSearchResult} field-for-field so the
+ * frontend sees one paged shape across the app. Deliberately NOT Spring {@code Page}
+ * (unstable serialized shape across Spring versions, noisy in OpenAPI) and deliberately
+ * NOT a reuse of the document DTO (would couple two feature modules — duplication beats
+ * coupling here).
+ */
+public record PersonSearchResult(
+ @Schema(requiredMode = Schema.RequiredMode.REQUIRED)
+ List items,
+ @Schema(requiredMode = Schema.RequiredMode.REQUIRED)
+ long totalElements,
+ @Schema(requiredMode = Schema.RequiredMode.REQUIRED)
+ int pageNumber,
+ @Schema(requiredMode = Schema.RequiredMode.REQUIRED)
+ int pageSize,
+ @Schema(requiredMode = Schema.RequiredMode.REQUIRED)
+ int totalPages
+) {
+ /**
+ * Paged factory: derives {@code totalPages} from the full match count and the page size.
+ * A zero count yields zero pages so the frontend hides the pagination control.
+ */
+ public static PersonSearchResult paged(List slice, int pageNumber, int pageSize, long totalElements) {
+ int totalPages = pageSize == 0 ? 0 : (int) ((totalElements + pageSize - 1) / pageSize);
+ return new PersonSearchResult(slice, totalElements, pageNumber, pageSize, totalPages);
+ }
+
+ /**
+ * Non-paged factory for the legacy {@code sort=documentCount} top-N dashboard path.
+ * That query returns the complete result in one shot — there is no further page
+ * to fetch — so the envelope reports reality rather than pretending to be a slice of a
+ * larger set: {@code totalElements} equals the number of rows actually returned,
+ * {@code pageSize} equals that same count, and {@code totalPages} is 1 (or 0 when empty).
+ * This avoids the earlier ambiguity where {@code totalElements} looked like a paged total.
+ */
+ public static PersonSearchResult topN(List all) {
+ int count = all.size();
+ int totalPages = count == 0 ? 0 : 1;
+ return new PersonSearchResult(all, count, 0, count, totalPages);
+ }
+}
diff --git a/backend/src/main/java/org/raddatz/familienarchiv/person/PersonService.java b/backend/src/main/java/org/raddatz/familienarchiv/person/PersonService.java
index 89b11ef3..175ab529 100644
--- a/backend/src/main/java/org/raddatz/familienarchiv/person/PersonService.java
+++ b/backend/src/main/java/org/raddatz/familienarchiv/person/PersonService.java
@@ -31,20 +31,55 @@ public class PersonService {
private final PersonRepository personRepository;
private final PersonNameAliasRepository aliasRepository;
- public List findAll(String q) {
- if (q == null) {
- return personRepository.findAllWithDocumentCount();
- }
- if (q.isBlank()) {
- return List.of();
- }
- return personRepository.searchWithDocumentCount(q.trim());
- }
-
public List findTopByDocumentCount(int limit) {
return personRepository.findTopByDocumentCount(limit);
}
+ /**
+ * Filtered, paginated directory query. The slice and the total are derived from one
+ * shared WHERE clause (see {@link PersonRepository#FILTER_WHERE}) so totalElements can
+ * never drift from the rendered page. {@code type} is passed as the enum name because the
+ * native query compares against the string column.
+ */
+ public PersonSearchResult search(PersonFilter filter, int page, int size, String q) {
+ String type = filter.type() == null ? null : filter.type().name();
+ String query = (q == null || q.isBlank()) ? null : q.trim();
+ int offset = page * size;
+
+ List items = personRepository.findByFilter(
+ type, filter.familyOnly(), filter.hasDocuments(), filter.provisional(),
+ filter.readerDefault(), query, size, offset);
+ long total = personRepository.countByFilter(
+ type, filter.familyOnly(), filter.hasDocuments(), filter.provisional(),
+ filter.readerDefault(), query);
+
+ return PersonSearchResult.paged(items, page, size, total);
+ }
+
+ /**
+ * Clears the {@code provisional} flag — a deliberate state transition exposed as
+ * {@code PATCH /api/persons/{id}/confirm}, never as a mass-assignable DTO field (CWE-915).
+ */
+ @Transactional
+ public Person confirmPerson(UUID id) {
+ Person person = getById(id);
+ person.setProvisional(false);
+ return personRepository.save(person);
+ }
+
+ /**
+ * Hard-deletes a person used by triage. Detaches the person from any documents they
+ * sent (nulls sender_id) and from any received-document references first, so the delete
+ * cannot orphan an FK and fail with a 500.
+ */
+ @Transactional
+ public void deletePerson(UUID id) {
+ getById(id);
+ personRepository.reassignSenderToNull(id);
+ personRepository.deleteReceiverReferences(id);
+ personRepository.deleteById(id);
+ }
+
public Person getById(UUID id) {
return personRepository.findById(id)
.orElseThrow(() -> DomainException.notFound(ErrorCode.PERSON_NOT_FOUND, "Person not found: " + id));
@@ -80,6 +115,11 @@ public class PersonService {
return personRepository.findByFirstNameIgnoreCaseAndLastNameIgnoreCase(firstName, lastName);
}
+ /** Lookup by the normalizer person_id — used by the canonical importer for register-first matching. */
+ public Optional findBySourceRef(String sourceRef) {
+ return personRepository.findBySourceRef(sourceRef);
+ }
+
@Nullable
@Transactional
public Person findOrCreateByAlias(String rawName) {
@@ -115,6 +155,80 @@ public class PersonService {
});
}
+ /**
+ * Idempotent upsert keyed on {@code sourceRef} (the normalizer person_id) for the
+ * canonical importer (Phase 3, ADR-025). On first import the canonical fields are
+ * written verbatim. On re-import the human-edit-preserve precedence applies:
+ * a non-blank existing field is never overwritten, and {@code provisional} never
+ * flips back to true once a human has confirmed the person.
+ */
+ @Transactional
+ public Person upsertBySourceRef(PersonUpsertCommand cmd) {
+ return personRepository.findBySourceRef(cmd.sourceRef())
+ .map(existing -> personRepository.save(mergeCanonical(existing, cmd)))
+ .orElseGet(() -> fromCanonical(cmd));
+ }
+
+ private Person fromCanonical(PersonUpsertCommand cmd) {
+ Person person = personRepository.save(Person.builder()
+ .sourceRef(cmd.sourceRef())
+ .firstName(blankToNull(cmd.firstName()))
+ .lastName(cmd.lastName())
+ .notes(blankToNull(cmd.notes()))
+ .birthYear(cmd.birthYear())
+ .deathYear(cmd.deathYear())
+ .familyMember(cmd.familyMember())
+ .personType(cmd.personType() == null ? PersonType.PERSON : cmd.personType())
+ .provisional(cmd.provisional())
+ .build());
+ String maiden = blankToNull(cmd.maidenName());
+ if (maiden != null) {
+ int nextSortOrder = aliasRepository.findMaxSortOrder(person.getId()) + 1;
+ aliasRepository.save(PersonNameAlias.builder()
+ .person(person)
+ .lastName(maiden)
+ .type(PersonNameAliasType.MAIDEN_NAME)
+ .sortOrder(nextSortOrder)
+ .build());
+ }
+ return person;
+ }
+
+ private Person mergeCanonical(Person existing, PersonUpsertCommand cmd) {
+ existing.setFirstName(preferHuman(existing.getFirstName(), cmd.firstName()));
+ existing.setLastName(preferHuman(existing.getLastName(), cmd.lastName()));
+ existing.setNotes(preferHuman(existing.getNotes(), cmd.notes()));
+ existing.setBirthYear(preferHuman(existing.getBirthYear(), cmd.birthYear()));
+ existing.setDeathYear(preferHuman(existing.getDeathYear(), cmd.deathYear()));
+ if (cmd.personType() != null && existing.getPersonType() == PersonType.PERSON) {
+ existing.setPersonType(cmd.personType());
+ }
+ // provisional is monotonic-downward: once it is false it never reverts to true.
+ // This also pins the cross-loader precedence (ADR-025): a register/tree person is
+ // loaded before documents and already false, so a later document row that references
+ // the same source_ref (provisional=true) can never flip it provisional — the guard
+ // below only fires while existing is still provisional. Order of document rows is
+ // therefore irrelevant.
+ if (existing.isProvisional()) {
+ existing.setProvisional(cmd.provisional());
+ }
+ return existing;
+ }
+
+ // preferHuman keeps an existing human-entered value and only falls back to the canonical
+ // value when the existing one is absent — the single idiom for every fill-blank field.
+ private static String preferHuman(String existing, String canonical) {
+ return (existing == null || existing.isBlank()) ? blankToNull(canonical) : existing;
+ }
+
+ private static Integer preferHuman(Integer existing, Integer canonical) {
+ return existing != null ? existing : canonical;
+ }
+
+ private static String blankToNull(String s) {
+ return (s == null || s.isBlank()) ? null : s.trim();
+ }
+
@Transactional
public Person createPerson(String firstName, String lastName, String alias) {
Person person = Person.builder()
diff --git a/backend/src/main/java/org/raddatz/familienarchiv/person/PersonSummaryDTO.java b/backend/src/main/java/org/raddatz/familienarchiv/person/PersonSummaryDTO.java
index 68cbbe1b..9a92d257 100644
--- a/backend/src/main/java/org/raddatz/familienarchiv/person/PersonSummaryDTO.java
+++ b/backend/src/main/java/org/raddatz/familienarchiv/person/PersonSummaryDTO.java
@@ -18,6 +18,7 @@ public interface PersonSummaryDTO {
Integer getDeathYear();
String getNotes();
boolean isFamilyMember();
+ boolean isProvisional();
long getDocumentCount();
default String getDisplayName() {
diff --git a/backend/src/main/java/org/raddatz/familienarchiv/person/PersonUpsertCommand.java b/backend/src/main/java/org/raddatz/familienarchiv/person/PersonUpsertCommand.java
new file mode 100644
index 00000000..63864ab6
--- /dev/null
+++ b/backend/src/main/java/org/raddatz/familienarchiv/person/PersonUpsertCommand.java
@@ -0,0 +1,24 @@
+package org.raddatz.familienarchiv.person;
+
+import lombok.Builder;
+
+/**
+ * Importer → {@link PersonService} command for an idempotent upsert keyed on
+ * {@code sourceRef} (the normalizer's stable person_id). Carries only the canonical
+ * fields the importer owns; the service applies the human-edit-preserve precedence
+ * (see ADR-025): non-blank existing fields are never overwritten, and {@code provisional}
+ * never flips back to true once a human has confirmed a person.
+ */
+@Builder
+public record PersonUpsertCommand(
+ String sourceRef,
+ String firstName,
+ String lastName,
+ String maidenName,
+ String notes,
+ Integer birthYear,
+ Integer deathYear,
+ boolean familyMember,
+ PersonType personType,
+ boolean provisional
+) {}
diff --git a/backend/src/main/java/org/raddatz/familienarchiv/person/relationship/RelationshipService.java b/backend/src/main/java/org/raddatz/familienarchiv/person/relationship/RelationshipService.java
index 032c1263..d813b8e8 100644
--- a/backend/src/main/java/org/raddatz/familienarchiv/person/relationship/RelationshipService.java
+++ b/backend/src/main/java/org/raddatz/familienarchiv/person/relationship/RelationshipService.java
@@ -31,6 +31,12 @@ import java.util.UUID;
@RequiredArgsConstructor
public class RelationshipService {
+ // Single source of truth for which relationship types are part of the family graph.
+ // Consulted by addRelationship (to set family_member on both endpoints) and by
+ // getFamilyNetwork (to filter the edges returned). FRIEND/COLLEAGUE/etc. are excluded.
+ private static final List FAMILY_RELATION_TYPES =
+ List.of(RelationType.PARENT_OF, RelationType.SPOUSE_OF, RelationType.SIBLING_OF);
+
private final PersonRelationshipRepository relationshipRepository;
private final PersonService personService;
private final RelationshipInferenceService inferenceService;
@@ -64,7 +70,7 @@ public class RelationshipService {
}
List familyEdges = relationshipRepository.findAllByRelationTypeIn(
- List.of(RelationType.PARENT_OF, RelationType.SPOUSE_OF, RelationType.SIBLING_OF));
+ FAMILY_RELATION_TYPES);
List edges = new ArrayList<>();
for (PersonRelationship r : familyEdges) {
@@ -105,15 +111,23 @@ public class RelationshipService {
.notes(blankToNull(dto.notes()))
.build();
+ PersonRelationship saved;
try {
// saveAndFlush so the unique_rel constraint violates synchronously and is
// caught here, not at commit time outside the @Transactional boundary.
- return toDTO(relationshipRepository.saveAndFlush(rel));
+ saved = relationshipRepository.saveAndFlush(rel);
} catch (DataIntegrityViolationException e) {
throw DomainException.conflict(
ErrorCode.DUPLICATE_RELATIONSHIP,
"Relationship already exists for (" + personId + ", " + relatedPerson.getId() + ", " + dto.relationType() + ")");
}
+ // Family-graph edges imply both endpoints are family members. Idempotent: the
+ // setter is a no-op when the person is already flagged, so re-imports stay clean.
+ if (FAMILY_RELATION_TYPES.contains(dto.relationType())) {
+ personService.setFamilyMember(person.getId(), true);
+ personService.setFamilyMember(relatedPerson.getId(), true);
+ }
+ return toDTO(saved);
}
@Transactional
diff --git a/backend/src/main/java/org/raddatz/familienarchiv/tag/Tag.java b/backend/src/main/java/org/raddatz/familienarchiv/tag/Tag.java
index fc5974a6..32585eed 100644
--- a/backend/src/main/java/org/raddatz/familienarchiv/tag/Tag.java
+++ b/backend/src/main/java/org/raddatz/familienarchiv/tag/Tag.java
@@ -30,4 +30,11 @@ public class Tag {
/** Color token name (e.g. "sage"), only set on root-level tags. Null means no color. */
private String color;
+
+ /**
+ * Import identity key, keyed on the canonical tag_path. Null for manually created tags;
+ * unique among non-null values. The importer (Phase 3) uses it for idempotent re-import.
+ */
+ @Column(name = "source_ref")
+ private String sourceRef;
}
diff --git a/backend/src/main/java/org/raddatz/familienarchiv/tag/TagRepository.java b/backend/src/main/java/org/raddatz/familienarchiv/tag/TagRepository.java
index 4a7fab90..f1b3b7ab 100644
--- a/backend/src/main/java/org/raddatz/familienarchiv/tag/TagRepository.java
+++ b/backend/src/main/java/org/raddatz/familienarchiv/tag/TagRepository.java
@@ -22,6 +22,9 @@ public interface TagRepository extends JpaRepository {
Optional findByNameIgnoreCase(String name);
+ // Lookup by the canonical tag_path, used for idempotent canonical re-import (Phase 3).
+ Optional findBySourceRef(String sourceRef);
+
List findByNameContainingIgnoreCase(String name);
/**
diff --git a/backend/src/main/java/org/raddatz/familienarchiv/tag/TagService.java b/backend/src/main/java/org/raddatz/familienarchiv/tag/TagService.java
index a572f84f..14e1e9fa 100644
--- a/backend/src/main/java/org/raddatz/familienarchiv/tag/TagService.java
+++ b/backend/src/main/java/org/raddatz/familienarchiv/tag/TagService.java
@@ -7,6 +7,7 @@ import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
+import java.util.Optional;
import java.util.Set;
import java.util.UUID;
import java.util.stream.Collectors;
@@ -49,12 +50,37 @@ public class TagService {
.orElseThrow(() -> DomainException.notFound(ErrorCode.TAG_NOT_FOUND, "Tag not found: " + id));
}
+ /** Lookup by the canonical tag_path — used by the canonical importer to attach a document's tag. */
+ public Optional findBySourceRef(String sourceRef) {
+ return tagRepository.findBySourceRef(sourceRef);
+ }
+
public Tag findOrCreate(String name) {
String cleanName = name.trim();
return tagRepository.findByNameIgnoreCase(cleanName)
.orElseGet(() -> tagRepository.save(Tag.builder().name(cleanName).build()));
}
+ /**
+ * Idempotent upsert keyed on {@code sourceRef} (the canonical tag_path) for the
+ * Phase-3 importer (ADR-025). On first import the canonical name and parent are
+ * written; on re-import a human-renamed tag name is preserved (the source_ref is the
+ * stable identity, the name is a human-editable label).
+ */
+ @Transactional
+ public Tag upsertBySourceRef(String sourceRef, String name, UUID parentId) {
+ return tagRepository.findBySourceRef(sourceRef)
+ .map(existing -> {
+ existing.setParentId(parentId);
+ return tagRepository.save(existing);
+ })
+ .orElseGet(() -> tagRepository.save(Tag.builder()
+ .sourceRef(sourceRef)
+ .name(name)
+ .parentId(parentId)
+ .build()));
+ }
+
@Transactional
public Tag update(UUID id, TagUpdateDTO dto) {
Tag tag = getById(id);
diff --git a/backend/src/main/java/org/raddatz/familienarchiv/user/AdminController.java b/backend/src/main/java/org/raddatz/familienarchiv/user/AdminController.java
index 18b6c2c0..74b5d643 100644
--- a/backend/src/main/java/org/raddatz/familienarchiv/user/AdminController.java
+++ b/backend/src/main/java/org/raddatz/familienarchiv/user/AdminController.java
@@ -5,7 +5,8 @@ import org.raddatz.familienarchiv.security.Permission;
import org.raddatz.familienarchiv.security.RequirePermission;
import org.raddatz.familienarchiv.document.DocumentService;
import org.raddatz.familienarchiv.document.DocumentVersionService;
-import org.raddatz.familienarchiv.importing.MassImportService;
+import org.raddatz.familienarchiv.importing.CanonicalImportOrchestrator;
+import org.raddatz.familienarchiv.importing.ImportStatus;
import org.raddatz.familienarchiv.document.ThumbnailBackfillService;
import org.springframework.http.ResponseEntity;
import org.springframework.web.bind.annotation.GetMapping;
@@ -21,20 +22,20 @@ import lombok.RequiredArgsConstructor;
@RequiredArgsConstructor
public class AdminController {
- private final MassImportService massImportService;
+ private final CanonicalImportOrchestrator importOrchestrator;
private final DocumentService documentService;
private final DocumentVersionService documentVersionService;
private final ThumbnailBackfillService thumbnailBackfillService;
@PostMapping("/trigger-import")
- public ResponseEntity triggerMassImport() {
- massImportService.runImportAsync();
- return ResponseEntity.accepted().body(massImportService.getStatus());
+ public ResponseEntity triggerMassImport() {
+ importOrchestrator.runImportAsync();
+ return ResponseEntity.accepted().body(importOrchestrator.getStatus());
}
@GetMapping("/import-status")
- public ResponseEntity importStatus() {
- return ResponseEntity.ok(massImportService.getStatus());
+ public ResponseEntity importStatus() {
+ return ResponseEntity.ok(importOrchestrator.getStatus());
}
@PostMapping("/backfill-versions")
diff --git a/backend/src/main/resources/application.yaml b/backend/src/main/resources/application.yaml
index e74f4d41..1e4558e0 100644
--- a/backend/src/main/resources/application.yaml
+++ b/backend/src/main/resources/application.yaml
@@ -125,17 +125,10 @@ app:
password: ${APP_ADMIN_PASSWORD:admin123}
import:
- col:
- index: 0
- box: 1
- folder: 2
- sender: 3
- receivers: 5
- date: 7
- location: 9
- tags: 10
- summary: 11
- transcription: 13
+ # Directory holding the normalizer's committed canonical artifacts
+ # (canonical-{documents,persons,tag-tree}.xlsx + canonical-persons-tree.json).
+ # The loader maps columns by header name — no positional indices (see ADR-025).
+ dir: ${IMPORT_DIR:/import}
ocr:
sender-model:
diff --git a/backend/src/main/resources/db/migration/V69__import_precision_attribution_identity_schema.sql b/backend/src/main/resources/db/migration/V69__import_precision_attribution_identity_schema.sql
new file mode 100644
index 00000000..bec01873
--- /dev/null
+++ b/backend/src/main/resources/db/migration/V69__import_precision_attribution_identity_schema.sql
@@ -0,0 +1,67 @@
+-- Phase 2 of "Handling the Unknowns": the schema foundation.
+-- Consolidates every new import/precision/attribution/identity column into ONE
+-- migration with a single owner so downstream phases (importer, rendering, persons
+-- directory) compile against a finished, collision-free schema. See ADR-025.
+--
+-- This file is forward-only and immutable once shipped (Flyway checksum model):
+-- any fix goes in a later version, never an edit here.
+
+-- ─── documents: date precision, range end, raw date, raw attribution ──────────
+
+-- Range end is only set for RANGE precision (open-ended ranges allowed → end may be null).
+ALTER TABLE documents ADD COLUMN meta_date_end date;
+
+-- Original date cell, verbatim, for provenance and "as written" display (Phase 4).
+ALTER TABLE documents ADD COLUMN meta_date_raw text;
+
+-- Raw attribution preserved even when a person is linked.
+ALTER TABLE documents ADD COLUMN sender_text text;
+ALTER TABLE documents ADD COLUMN receiver_text text;
+
+-- Bound user-influenced spreadsheet text at the DB layer (mirrors transcription_blocks
+-- length cap in V18). Defense in depth against malformed/huge import cells.
+ALTER TABLE documents ADD CONSTRAINT chk_meta_date_raw_length CHECK (length(meta_date_raw) <= 10000);
+ALTER TABLE documents ADD CONSTRAINT chk_sender_text_length CHECK (length(sender_text) <= 10000);
+ALTER TABLE documents ADD CONSTRAINT chk_receiver_text_length CHECK (length(receiver_text) <= 10000);
+
+-- Precision enum — added with a DB default of 'UNKNOWN', backfilled, then made NOT NULL.
+-- The DEFAULT serves two purposes: (1) existing rows get 'UNKNOWN' immediately, and
+-- (2) raw-SQL inserts that omit the column (test fixtures, ad-hoc data loads) get a sane,
+-- CHECK-valid value instead of violating the NOT NULL constraint. JPA saves still set it
+-- explicitly via the entity's @Builder.Default = DatePrecision.UNKNOWN.
+ALTER TABLE documents ADD COLUMN meta_date_precision varchar(16) DEFAULT 'UNKNOWN';
+
+UPDATE documents
+SET meta_date_precision = CASE WHEN meta_date IS NOT NULL THEN 'DAY' ELSE 'UNKNOWN' END;
+
+ALTER TABLE documents ALTER COLUMN meta_date_precision SET NOT NULL;
+
+-- Fail-closed allowlist of the seven precision values (verbatim mirror of the
+-- normalizer's Precision enum). The DB enforces validity independent of the Java enum.
+ALTER TABLE documents ADD CONSTRAINT chk_meta_date_precision
+ CHECK (meta_date_precision IN ('DAY', 'MONTH', 'SEASON', 'YEAR', 'RANGE', 'APPROX', 'UNKNOWN'));
+
+-- A non-null range end is permitted only when precision = RANGE. A RANGE row MAY have a
+-- null end (open-ended range), so the rule is one-directional, not biconditional.
+ALTER TABLE documents ADD CONSTRAINT chk_meta_date_end_only_for_range
+ CHECK (meta_date_end IS NULL OR meta_date_precision = 'RANGE');
+
+-- For ranges with both endpoints, the end must not precede the start.
+ALTER TABLE documents ADD CONSTRAINT chk_meta_date_end_after_start
+ CHECK (meta_date_end IS NULL OR meta_date IS NULL OR meta_date_end >= meta_date);
+
+-- ─── persons: source_ref (import identity) + provisional flag ─────────────────
+
+-- The normalizer person_id: join key for documents → persons and idempotency key for
+-- re-import. Nullable (manually created persons never have one); unique among non-nulls.
+ALTER TABLE persons ADD COLUMN source_ref varchar(255);
+CREATE UNIQUE INDEX idx_persons_source_ref ON persons (source_ref);
+
+-- A provisional person is one the importer inferred but could not confidently identify.
+-- Stays false until Phase 3 (importer) sets it; no code path writes true in this phase.
+ALTER TABLE persons ADD COLUMN provisional boolean NOT NULL DEFAULT false;
+
+-- ─── tag: source_ref (import identity, keyed on canonical tag_path) ───────────
+
+ALTER TABLE tag ADD COLUMN source_ref varchar(255);
+CREATE UNIQUE INDEX idx_tag_source_ref ON tag (source_ref);
diff --git a/backend/src/test/java/org/raddatz/familienarchiv/MigrationIntegrationTest.java b/backend/src/test/java/org/raddatz/familienarchiv/MigrationIntegrationTest.java
index 425d0f59..ce217e6f 100644
--- a/backend/src/test/java/org/raddatz/familienarchiv/MigrationIntegrationTest.java
+++ b/backend/src/test/java/org/raddatz/familienarchiv/MigrationIntegrationTest.java
@@ -479,6 +479,191 @@ class MigrationIntegrationTest {
assertThat(count).isEqualTo(1);
}
+ // ─── V69: import/precision/attribution/identity schema foundation ────────
+
+ @Test
+ void v69_metaDatePrecisionColumn_isNotNull() {
+ Integer count = jdbc.queryForObject(
+ """
+ SELECT COUNT(*) FROM information_schema.columns
+ WHERE table_schema = 'public'
+ AND table_name = 'documents'
+ AND column_name = 'meta_date_precision'
+ AND is_nullable = 'NO'
+ """,
+ Integer.class);
+ assertThat(count).isEqualTo(1);
+ }
+
+ @Test
+ void v69_backfillSql_setsDatedRowsToDayPrecision() {
+ // Re-run the migration's backfill UPDATE on a freshly dated row to prove the rule.
+ UUID docId = createDocumentWithDate("1943-05-12");
+
+ jdbc.update(V69_BACKFILL_PRECISION_SQL);
+
+ String precision = jdbc.queryForObject(
+ "SELECT meta_date_precision FROM documents WHERE id = ?", String.class, docId);
+ assertThat(precision).isEqualTo("DAY");
+ }
+
+ @Test
+ void v69_backfillSql_setsUndatedRowsToUnknownPrecision() {
+ UUID docId = createDocument(); // no meta_date
+
+ jdbc.update(V69_BACKFILL_PRECISION_SQL);
+
+ String precision = jdbc.queryForObject(
+ "SELECT meta_date_precision FROM documents WHERE id = ?", String.class, docId);
+ assertThat(precision).isEqualTo("UNKNOWN");
+ }
+
+ // Mirrors the backfill UPDATE shipped in V69; idempotent for verification.
+ private static final String V69_BACKFILL_PRECISION_SQL = """
+ UPDATE documents
+ SET meta_date_precision = CASE WHEN meta_date IS NOT NULL THEN 'DAY' ELSE 'UNKNOWN' END
+ """;
+
+ @Test
+ void v69_precisionCheck_rejectsValueOutsideEnum() {
+ UUID docId = createDocument();
+
+ assertThatThrownBy(() ->
+ jdbc.update("UPDATE documents SET meta_date_precision = 'BOGUS' WHERE id = ?", docId)
+ ).isInstanceOf(DataIntegrityViolationException.class);
+ }
+
+ @Test
+ void v69_metaDateEndCheck_rejectsNonNullEndWhenPrecisionNotRange() {
+ UUID docId = createDocumentWithDate("1943-05-12"); // precision DAY
+
+ assertThatThrownBy(() ->
+ jdbc.update("UPDATE documents SET meta_date_end = '1943-06-01' WHERE id = ?", docId)
+ ).isInstanceOf(DataIntegrityViolationException.class);
+ }
+
+ @Test
+ void v69_metaDateEndCheck_allowsNonNullEndWhenPrecisionRange() {
+ UUID docId = createDocumentWithDate("1943-05-12");
+
+ int rows = jdbc.update(
+ "UPDATE documents SET meta_date_precision = 'RANGE', meta_date_end = '1943-06-01' WHERE id = ?",
+ docId);
+ assertThat(rows).isEqualTo(1);
+ }
+
+ @Test
+ void v69_metaDateEndCheck_allowsRangeWithNullEnd() {
+ // Loose semantics: the normalizer may emit an open-ended RANGE (start only).
+ UUID docId = createDocumentWithDate("1943-05-12");
+
+ int rows = jdbc.update(
+ "UPDATE documents SET meta_date_precision = 'RANGE' WHERE id = ?", docId);
+ assertThat(rows).isEqualTo(1);
+ }
+
+ @Test
+ void v69_metaDateEndCheck_allowsRangeWithBothEndpointsNull() {
+ // Fully-open RANGE: neither start (meta_date) nor end (meta_date_end) is set.
+ // Both CHECKs hold (end IS NULL passes chk_meta_date_end_only_for_range; both-null
+ // passes chk_meta_date_end_after_start), so the row survives. This locks the actual
+ // DB behavior so a future tightening to a biconditional rule is a deliberate change.
+ UUID docId = createDocument(); // null meta_date
+
+ int rows = jdbc.update(
+ "UPDATE documents SET meta_date_precision = 'RANGE' WHERE id = ?", docId);
+ assertThat(rows).isEqualTo(1);
+
+ Object metaDate = jdbc.queryForObject("SELECT meta_date FROM documents WHERE id = ?", Object.class, docId);
+ Object metaDateEnd = jdbc.queryForObject(
+ "SELECT meta_date_end FROM documents WHERE id = ?", Object.class, docId);
+ assertThat(metaDate).isNull();
+ assertThat(metaDateEnd).isNull();
+ }
+
+ @Test
+ void v69_rangeOrderCheck_rejectsEndBeforeStart() {
+ UUID docId = createDocumentWithDate("1943-05-12");
+
+ assertThatThrownBy(() ->
+ jdbc.update(
+ "UPDATE documents SET meta_date_precision = 'RANGE', meta_date_end = '1943-01-01' WHERE id = ?",
+ docId)
+ ).isInstanceOf(DataIntegrityViolationException.class);
+ }
+
+ @Test
+ void v69_metaDateRawCheck_rejectsOverlongText() {
+ UUID docId = createDocument();
+ String tooLong = "x".repeat(10001);
+
+ assertThatThrownBy(() ->
+ jdbc.update("UPDATE documents SET meta_date_raw = ? WHERE id = ?", tooLong, docId)
+ ).isInstanceOf(DataIntegrityViolationException.class);
+ }
+
+ @Test
+ void v69_senderTextAndReceiverText_storeRawAttribution() {
+ UUID docId = createDocument();
+
+ int rows = jdbc.update(
+ "UPDATE documents SET sender_text = 'Oma Anna', receiver_text = 'Tante Grete' WHERE id = ?",
+ docId);
+ assertThat(rows).isEqualTo(1);
+ }
+
+ @Test
+ @Transactional(propagation = Propagation.NOT_SUPPORTED)
+ void v69_personsSourceRef_uniqueIndexRejectsDuplicate() {
+ jdbc.update(
+ "INSERT INTO persons (id, last_name, source_ref) VALUES (gen_random_uuid(), 'A', 'person:dup')");
+ try {
+ assertThatThrownBy(() ->
+ jdbc.update(
+ "INSERT INTO persons (id, last_name, source_ref) VALUES (gen_random_uuid(), 'B', 'person:dup')")
+ ).isInstanceOf(DataIntegrityViolationException.class);
+ } finally {
+ jdbc.update("DELETE FROM persons WHERE source_ref = 'person:dup'");
+ }
+ }
+
+ @Test
+ @Transactional(propagation = Propagation.NOT_SUPPORTED)
+ void v69_personsSourceRef_allowsMultipleNulls() {
+ UUID a = createPerson("Null", "RefA");
+ UUID b = createPerson("Null", "RefB");
+ try {
+ String refA = jdbc.queryForObject("SELECT source_ref FROM persons WHERE id = ?", String.class, a);
+ String refB = jdbc.queryForObject("SELECT source_ref FROM persons WHERE id = ?", String.class, b);
+ assertThat(refA).isNull();
+ assertThat(refB).isNull();
+ } finally {
+ jdbc.update("DELETE FROM persons WHERE id IN (?, ?)", a, b);
+ }
+ }
+
+ @Test
+ void v69_personsProvisional_defaultsToFalse() {
+ UUID id = createPerson("Provisional", "Default");
+
+ Boolean provisional = jdbc.queryForObject(
+ "SELECT provisional FROM persons WHERE id = ?", Boolean.class, id);
+ assertThat(provisional).isFalse();
+ }
+
+ @Test
+ @Transactional(propagation = Propagation.NOT_SUPPORTED)
+ void v69_tagSourceRef_uniqueIndexRejectsDuplicate() {
+ jdbc.update("INSERT INTO tag (id, name, source_ref) VALUES (gen_random_uuid(), 'TagDupA', 'tag:dup')");
+ try {
+ assertThatThrownBy(() ->
+ jdbc.update("INSERT INTO tag (id, name, source_ref) VALUES (gen_random_uuid(), 'TagDupB', 'tag:dup')")
+ ).isInstanceOf(DataIntegrityViolationException.class);
+ } finally {
+ jdbc.update("DELETE FROM tag WHERE source_ref = 'tag:dup'");
+ }
+ }
+
// ─── helpers ─────────────────────────────────────────────────────────────
private UUID createPerson(String firstName, String lastName) {
@@ -504,6 +689,12 @@ class MigrationIntegrationTest {
return doc.getId();
}
+ private UUID createDocumentWithDate(String isoDate) {
+ UUID id = createDocument();
+ jdbc.update("UPDATE documents SET meta_date = ?::date WHERE id = ?", isoDate, id);
+ return id;
+ }
+
private UUID insertAnnotation(UUID docId) {
UUID id = UUID.randomUUID();
jdbc.update("""
diff --git a/backend/src/test/java/org/raddatz/familienarchiv/document/DocumentControllerTest.java b/backend/src/test/java/org/raddatz/familienarchiv/document/DocumentControllerTest.java
index fe15ba3b..7c9b28a1 100644
--- a/backend/src/test/java/org/raddatz/familienarchiv/document/DocumentControllerTest.java
+++ b/backend/src/test/java/org/raddatz/familienarchiv/document/DocumentControllerTest.java
@@ -1,6 +1,7 @@
package org.raddatz.familienarchiv.document;
import org.junit.jupiter.api.Test;
+import org.mockito.ArgumentCaptor;
import org.raddatz.familienarchiv.document.DocumentBatchMetadataDTO;
import org.raddatz.familienarchiv.document.DocumentSearchResult;
import org.raddatz.familienarchiv.document.DocumentVersionSummary;
@@ -35,7 +36,9 @@ import java.util.List;
import java.util.Optional;
import java.util.UUID;
+import static org.assertj.core.api.Assertions.assertThat;
import static org.mockito.ArgumentMatchers.any;
+import static org.mockito.ArgumentMatchers.anyBoolean;
import static org.mockito.ArgumentMatchers.anyInt;
import static org.mockito.ArgumentMatchers.eq;
import static org.mockito.Mockito.verify;
@@ -73,23 +76,69 @@ class DocumentControllerTest {
@Test
@WithMockUser
void search_returns200_whenAuthenticated() throws Exception {
- when(documentService.searchDocuments(any(), any(), any(), any(), any(), any(), any(), any(), any(), any(), any(), any()))
+ when(documentService.searchDocuments(any(), any(), any(), any(), any(), any(), any(), any(), any(), any(), any(), anyBoolean(), any()))
.thenReturn(DocumentSearchResult.of(List.of()));
mockMvc.perform(get("/api/documents/search"))
.andExpect(status().isOk());
}
+ @Test
+ @WithMockUser
+ void search_undatedTrue_isReachableByAuthenticatedUser() throws Exception {
+ // The read GET must stay reachable for READ_ALL users — guards against a
+ // future refactor accidentally write-guarding the undated triage path (#668).
+ when(documentService.searchDocuments(any(), any(), any(), any(), any(), any(), any(), any(), any(), any(), any(), anyBoolean(), any()))
+ .thenReturn(DocumentSearchResult.of(List.of()));
+
+ mockMvc.perform(get("/api/documents/search").param("undated", "true"))
+ .andExpect(status().isOk());
+ }
+
+ @Test
+ void search_undatedTrue_returns401_whenUnauthenticated() throws Exception {
+ mockMvc.perform(get("/api/documents/search").param("undated", "true"))
+ .andExpect(status().isUnauthorized());
+ }
+
+ @Test
+ @WithMockUser
+ void search_undatedTrue_isForwardedToServiceAsTrue() throws Exception {
+ ArgumentCaptor undatedCaptor = ArgumentCaptor.forClass(Boolean.class);
+ when(documentService.searchDocuments(any(), any(), any(), any(), any(), any(), any(), any(), any(), any(), any(), anyBoolean(), any()))
+ .thenReturn(DocumentSearchResult.of(List.of()));
+
+ mockMvc.perform(get("/api/documents/search").param("undated", "true"))
+ .andExpect(status().isOk());
+
+ verify(documentService).searchDocuments(any(), any(), any(), any(), any(), any(), any(), any(), any(), any(), any(), undatedCaptor.capture(), any());
+ assertThat(undatedCaptor.getValue()).isTrue();
+ }
+
+ @Test
+ @WithMockUser
+ void search_withoutUndatedParam_forwardsFalseToService() throws Exception {
+ ArgumentCaptor undatedCaptor = ArgumentCaptor.forClass(Boolean.class);
+ when(documentService.searchDocuments(any(), any(), any(), any(), any(), any(), any(), any(), any(), any(), any(), anyBoolean(), any()))
+ .thenReturn(DocumentSearchResult.of(List.of()));
+
+ mockMvc.perform(get("/api/documents/search"))
+ .andExpect(status().isOk());
+
+ verify(documentService).searchDocuments(any(), any(), any(), any(), any(), any(), any(), any(), any(), any(), any(), undatedCaptor.capture(), any());
+ assertThat(undatedCaptor.getValue()).isFalse();
+ }
+
@Test
@WithMockUser
void search_withStatusParam_passesItToService() throws Exception {
- when(documentService.searchDocuments(any(), any(), any(), any(), any(), any(), any(), eq(DocumentStatus.REVIEWED), any(), any(), any(), any()))
+ when(documentService.searchDocuments(any(), any(), any(), any(), any(), any(), any(), eq(DocumentStatus.REVIEWED), any(), any(), any(), anyBoolean(), any()))
.thenReturn(DocumentSearchResult.of(List.of()));
mockMvc.perform(get("/api/documents/search").param("status", "REVIEWED"))
.andExpect(status().isOk());
- verify(documentService).searchDocuments(any(), any(), any(), any(), any(), any(), any(), eq(DocumentStatus.REVIEWED), any(), any(), any(), any());
+ verify(documentService).searchDocuments(any(), any(), any(), any(), any(), any(), any(), eq(DocumentStatus.REVIEWED), any(), any(), any(), anyBoolean(), any());
}
@Test
@@ -116,7 +165,7 @@ class DocumentControllerTest {
@Test
@WithMockUser
void search_responseContainsTotalCount() throws Exception {
- when(documentService.searchDocuments(any(), any(), any(), any(), any(), any(), any(), any(), any(), any(), any(), any()))
+ when(documentService.searchDocuments(any(), any(), any(), any(), any(), any(), any(), any(), any(), any(), any(), anyBoolean(), any()))
.thenReturn(DocumentSearchResult.of(List.of()));
mockMvc.perform(get("/api/documents/search"))
@@ -131,9 +180,10 @@ class DocumentControllerTest {
UUID docId = UUID.randomUUID();
var matchData = new SearchMatchData(
"Er schrieb einen langen Brief", List.of(), false, List.of(), List.of(), List.of(), null, List.of());
- when(documentService.searchDocuments(any(), any(), any(), any(), any(), any(), any(), any(), any(), any(), any(), any()))
+ when(documentService.searchDocuments(any(), any(), any(), any(), any(), any(), any(), any(), any(), any(), any(), anyBoolean(), any()))
.thenReturn(DocumentSearchResult.of(List.of(new DocumentListItem(
- docId, "Brief an Anna", "brief.pdf", null, null, null,
+ docId, "Brief an Anna", "brief.pdf", null, null,
+ DatePrecision.UNKNOWN, null, null,
List.of(), List.of(), null, null, null, null,
0, List.of(), matchData,
LocalDateTime.of(2026, 1, 15, 10, 0), LocalDateTime.of(2026, 1, 15, 10, 0)))));
@@ -150,9 +200,10 @@ class DocumentControllerTest {
void search_returns_flat_item_with_id_and_without_sensitive_fields() throws Exception {
UUID docId = UUID.randomUUID();
var matchData = new SearchMatchData(null, List.of(), false, List.of(), List.of(), List.of(), null, List.of());
- when(documentService.searchDocuments(any(), any(), any(), any(), any(), any(), any(), any(), any(), any(), any(), any()))
+ when(documentService.searchDocuments(any(), any(), any(), any(), any(), any(), any(), any(), any(), any(), any(), anyBoolean(), any()))
.thenReturn(DocumentSearchResult.of(List.of(new DocumentListItem(
- docId, "Brief an Anna", "brief.pdf", null, null, null,
+ docId, "Brief an Anna", "brief.pdf", null, null,
+ DatePrecision.UNKNOWN, null, null,
List.of(), List.of(), null, null, null, null,
0, List.of(), matchData,
LocalDateTime.of(2026, 1, 15, 10, 0), LocalDateTime.of(2026, 1, 15, 10, 0)))));
@@ -172,7 +223,7 @@ class DocumentControllerTest {
@Test
@WithMockUser
void search_responseExposesPagingFields() throws Exception {
- when(documentService.searchDocuments(any(), any(), any(), any(), any(), any(), any(), any(), any(), any(), any(), any()))
+ when(documentService.searchDocuments(any(), any(), any(), any(), any(), any(), any(), any(), any(), any(), any(), anyBoolean(), any()))
.thenReturn(DocumentSearchResult.of(List.of()));
mockMvc.perform(get("/api/documents/search"))
@@ -217,7 +268,7 @@ class DocumentControllerTest {
@Test
@WithMockUser
void search_passesPageRequestToService() throws Exception {
- when(documentService.searchDocuments(any(), any(), any(), any(), any(), any(), any(), any(), any(), any(), any(), any()))
+ when(documentService.searchDocuments(any(), any(), any(), any(), any(), any(), any(), any(), any(), any(), any(), anyBoolean(), any()))
.thenReturn(DocumentSearchResult.of(List.of()));
mockMvc.perform(get("/api/documents/search").param("page", "2").param("size", "25"))
@@ -225,7 +276,7 @@ class DocumentControllerTest {
org.mockito.ArgumentCaptor captor =
org.mockito.ArgumentCaptor.forClass(org.springframework.data.domain.Pageable.class);
- verify(documentService).searchDocuments(any(), any(), any(), any(), any(), any(), any(), any(), any(), any(), any(), captor.capture());
+ verify(documentService).searchDocuments(any(), any(), any(), any(), any(), any(), any(), any(), any(), any(), any(), anyBoolean(), captor.capture());
org.springframework.data.domain.Pageable pageable = captor.getValue();
org.assertj.core.api.Assertions.assertThat(pageable.getPageNumber()).isEqualTo(2);
org.assertj.core.api.Assertions.assertThat(pageable.getPageSize()).isEqualTo(25);
@@ -294,6 +345,34 @@ class DocumentControllerTest {
.andExpect(status().isOk());
}
+ @Test
+ @WithMockUser(authorities = "WRITE_ALL")
+ void updateDocument_bindsPrecisionFormFields_toDTO() throws Exception {
+ // Pins the wire contract: the edit form's metaDatePrecision / metaDateEnd /
+ // metaDateRaw multipart field names must bind to DocumentUpdateDTO. A rename
+ // on either side silently drops the precision edit; this captures the DTO.
+ UUID id = UUID.randomUUID();
+ Document doc = Document.builder().id(id).title("Brief").originalFilename("brief.pdf").build();
+ when(userService.findByEmail(any())).thenReturn(AppUser.builder().id(UUID.randomUUID()).build());
+
+ org.mockito.ArgumentCaptor captor =
+ org.mockito.ArgumentCaptor.forClass(DocumentUpdateDTO.class);
+ when(documentService.updateDocument(eq(id), captor.capture(), any(), any())).thenReturn(doc);
+
+ mockMvc.perform(multipart("/api/documents/" + id)
+ .param("metaDatePrecision", "RANGE")
+ .param("metaDateEnd", "1917-01-11")
+ .param("metaDateRaw", "10.–11. Januar 1917")
+ .with(req -> { req.setMethod("PUT"); return req; }).with(csrf()))
+ .andExpect(status().isOk());
+
+ DocumentUpdateDTO bound = captor.getValue();
+ org.assertj.core.api.Assertions.assertThat(bound.getMetaDatePrecision()).isEqualTo(DatePrecision.RANGE);
+ org.assertj.core.api.Assertions.assertThat(bound.getMetaDateEnd())
+ .isEqualTo(java.time.LocalDate.of(1917, 1, 11));
+ org.assertj.core.api.Assertions.assertThat(bound.getMetaDateRaw()).isEqualTo("10.–11. Januar 1917");
+ }
+
// ─── DELETE /api/documents/{id} ──────────────────────────────────────────
@Test
@@ -1115,7 +1194,7 @@ class DocumentControllerTest {
void getDocumentIds_returns200_andDelegatesToService() throws Exception {
when(userService.findByEmail(any())).thenReturn(AppUser.builder().id(UUID.randomUUID()).build());
UUID id = UUID.randomUUID();
- when(documentService.findIdsForFilter(any(), any(), any(), any(), any(), any(), any(), any(), any()))
+ when(documentService.findIdsForFilter(any(), any(), any(), any(), any(), any(), any(), any(), any(), anyBoolean()))
.thenReturn(List.of(id));
mockMvc.perform(get("/api/documents/ids"))
@@ -1128,13 +1207,13 @@ class DocumentControllerTest {
void getDocumentIds_passesSenderIdParamToService() throws Exception {
when(userService.findByEmail(any())).thenReturn(AppUser.builder().id(UUID.randomUUID()).build());
UUID senderId = UUID.randomUUID();
- when(documentService.findIdsForFilter(any(), any(), any(), eq(senderId), any(), any(), any(), any(), any()))
+ when(documentService.findIdsForFilter(any(), any(), any(), eq(senderId), any(), any(), any(), any(), any(), anyBoolean()))
.thenReturn(List.of());
mockMvc.perform(get("/api/documents/ids").param("senderId", senderId.toString()))
.andExpect(status().isOk());
- verify(documentService).findIdsForFilter(any(), any(), any(), eq(senderId), any(), any(), any(), any(), any());
+ verify(documentService).findIdsForFilter(any(), any(), any(), eq(senderId), any(), any(), any(), any(), any(), anyBoolean());
}
@Test
@@ -1144,7 +1223,7 @@ class DocumentControllerTest {
// Service returns 5001 IDs — one over BULK_EDIT_FILTER_MAX_IDS (5000).
java.util.List tooMany = new java.util.ArrayList<>(5001);
for (int i = 0; i < 5001; i++) tooMany.add(UUID.randomUUID());
- when(documentService.findIdsForFilter(any(), any(), any(), any(), any(), any(), any(), any(), any()))
+ when(documentService.findIdsForFilter(any(), any(), any(), any(), any(), any(), any(), any(), any(), anyBoolean()))
.thenReturn(tooMany);
mockMvc.perform(get("/api/documents/ids"))
diff --git a/backend/src/test/java/org/raddatz/familienarchiv/document/DocumentLazyLoadingTest.java b/backend/src/test/java/org/raddatz/familienarchiv/document/DocumentLazyLoadingTest.java
index 62a2d843..1b5a4b1e 100644
--- a/backend/src/test/java/org/raddatz/familienarchiv/document/DocumentLazyLoadingTest.java
+++ b/backend/src/test/java/org/raddatz/familienarchiv/document/DocumentLazyLoadingTest.java
@@ -123,8 +123,7 @@ class DocumentLazyLoadingTest {
DocumentSearchResult result = documentService.searchDocuments(
null, null, null, null, null, null, null, null,
- DocumentSort.RECEIVER, "asc", null,
- PageRequest.of(0, 20));
+ DocumentSort.RECEIVER, "asc", null, false, PageRequest.of(0, 20));
assertThat(result.totalElements()).isGreaterThan(0);
assertThatCode(() ->
result.items().forEach(i -> { if (i.sender() != null) i.sender().getLastName(); }))
@@ -139,8 +138,7 @@ class DocumentLazyLoadingTest {
assertThatCode(() -> documentService.searchDocuments(
null, null, null, null, null, null, null, null,
- DocumentSort.SENDER, "asc", null,
- PageRequest.of(0, 20)))
+ DocumentSort.SENDER, "asc", null, false, PageRequest.of(0, 20)))
.doesNotThrowAnyException();
}
diff --git a/backend/src/test/java/org/raddatz/familienarchiv/document/DocumentListItemIntegrationTest.java b/backend/src/test/java/org/raddatz/familienarchiv/document/DocumentListItemIntegrationTest.java
index 4c532882..3d0e4b90 100644
--- a/backend/src/test/java/org/raddatz/familienarchiv/document/DocumentListItemIntegrationTest.java
+++ b/backend/src/test/java/org/raddatz/familienarchiv/document/DocumentListItemIntegrationTest.java
@@ -56,8 +56,7 @@ class DocumentListItemIntegrationTest {
assertThatCode(() -> documentService.searchDocuments(
null, null, null, null, null, null, null, null,
- DocumentSort.DATE, "DESC", null,
- PageRequest.of(0, 50)))
+ DocumentSort.DATE, "DESC", null, false, PageRequest.of(0, 50)))
.doesNotThrowAnyException();
}
@@ -72,8 +71,7 @@ class DocumentListItemIntegrationTest {
DocumentSearchResult result = documentService.searchDocuments(
null, null, null, null, null, null, null, null,
- DocumentSort.DATE, "DESC", null,
- PageRequest.of(0, 50));
+ DocumentSort.DATE, "DESC", null, false, PageRequest.of(0, 50));
assertThat(result.totalElements()).isGreaterThan(0);
DocumentListItem item = result.items().get(0);
@@ -81,6 +79,27 @@ class DocumentListItemIntegrationTest {
assertThat(item.title()).isEqualTo("Kurrent Brief");
}
+ @Test
+ void search_listItem_carriesMetaDatePrecisionAndEnd() {
+ documentRepository.save(Document.builder()
+ .title("Range Brief")
+ .originalFilename("range.pdf")
+ .status(DocumentStatus.UPLOADED)
+ .documentDate(java.time.LocalDate.of(1943, 1, 1))
+ .metaDatePrecision(DatePrecision.RANGE)
+ .metaDateEnd(java.time.LocalDate.of(1943, 12, 31))
+ .build());
+
+ DocumentSearchResult result = documentService.searchDocuments(
+ null, null, null, null, null, null, null, null,
+ DocumentSort.DATE, "DESC", null, false, PageRequest.of(0, 50));
+
+ DocumentListItem item = result.items().stream()
+ .filter(i -> i.title().equals("Range Brief")).findFirst().orElseThrow();
+ assertThat(item.metaDatePrecision()).isEqualTo(DatePrecision.RANGE);
+ assertThat(item.metaDateEnd()).isEqualTo(java.time.LocalDate.of(1943, 12, 31));
+ }
+
@Test
void detail_stillReturnsTrainingLabels() {
Document saved = documentRepository.save(Document.builder()
diff --git a/backend/src/test/java/org/raddatz/familienarchiv/document/DocumentSearchPagedIntegrationTest.java b/backend/src/test/java/org/raddatz/familienarchiv/document/DocumentSearchPagedIntegrationTest.java
index c61c38af..3d65cbac 100644
--- a/backend/src/test/java/org/raddatz/familienarchiv/document/DocumentSearchPagedIntegrationTest.java
+++ b/backend/src/test/java/org/raddatz/familienarchiv/document/DocumentSearchPagedIntegrationTest.java
@@ -62,8 +62,7 @@ class DocumentSearchPagedIntegrationTest {
void search_firstPage_returnsExactlyPageSizeItems_andCorrectTotalElements() {
DocumentSearchResult result = documentService.searchDocuments(
null, null, null, null, null, null, null, null,
- DocumentSort.DATE, "DESC", null,
- PageRequest.of(0, 50));
+ DocumentSort.DATE, "DESC", null, false, PageRequest.of(0, 50));
assertThat(result.items()).hasSize(50);
assertThat(result.totalElements()).isEqualTo(FIXTURE_SIZE);
@@ -76,8 +75,7 @@ class DocumentSearchPagedIntegrationTest {
void search_lastPartialPage_returnsRemainingItems() {
DocumentSearchResult result = documentService.searchDocuments(
null, null, null, null, null, null, null, null,
- DocumentSort.DATE, "DESC", null,
- PageRequest.of(2, 50));
+ DocumentSort.DATE, "DESC", null, false, PageRequest.of(2, 50));
// Page 2 (offset 100) of 120 docs → exactly 20 items on the tail.
assertThat(result.items()).hasSize(20);
@@ -89,8 +87,7 @@ class DocumentSearchPagedIntegrationTest {
void search_pageBeyondLast_returnsEmptyContent_totalElementsStillCorrect() {
DocumentSearchResult result = documentService.searchDocuments(
null, null, null, null, null, null, null, null,
- DocumentSort.DATE, "DESC", null,
- PageRequest.of(99, 50));
+ DocumentSort.DATE, "DESC", null, false, PageRequest.of(99, 50));
assertThat(result.items()).isEmpty();
assertThat(result.totalElements()).isEqualTo(FIXTURE_SIZE);
@@ -103,8 +100,7 @@ class DocumentSearchPagedIntegrationTest {
// returns the correct total from a real repository fetch.
DocumentSearchResult result = documentService.searchDocuments(
null, null, null, null, null, null, null, null,
- DocumentSort.SENDER, "asc", null,
- PageRequest.of(1, 50));
+ DocumentSort.SENDER, "asc", null, false, PageRequest.of(1, 50));
assertThat(result.items()).hasSize(50);
assertThat(result.totalElements()).isEqualTo(FIXTURE_SIZE);
@@ -112,16 +108,91 @@ class DocumentSearchPagedIntegrationTest {
assertThat(result.totalPages()).isEqualTo(3);
}
+ @Test
+ void search_undatedCount_isGlobalFilteredTotal_notPageSlice() {
+ // Seed 70 undated docs on top of the 120 dated ones. With a 50-per-page
+ // window the undated rows span multiple pages, so a page-local count could
+ // never exceed 50 — the global count must be the full 70 (issue #668).
+ int undatedTotal = 70;
+ for (int i = 0; i < undatedTotal; i++) {
+ documentRepository.save(Document.builder()
+ .title("Undatiert-" + String.format("%03d", i))
+ .originalFilename("undatiert-" + i + ".pdf")
+ .status(DocumentStatus.UPLOADED)
+ .metaDatePrecision(DatePrecision.UNKNOWN)
+ .documentDate(null)
+ .build());
+ }
+
+ DocumentSearchResult result = documentService.searchDocuments(
+ null, null, null, null, null, null, null, null,
+ DocumentSort.DATE, "DESC", null, false, PageRequest.of(0, 50));
+
+ // Global undated count is the full undated total, independent of page size.
+ assertThat(result.undatedCount()).isEqualTo(undatedTotal);
+ // Total matches both dated + undated (no undated-only filter applied).
+ assertThat(result.totalElements()).isEqualTo(FIXTURE_SIZE + undatedTotal);
+ // The first DATE-DESC page is all dated rows (nulls last), so a page-local
+ // tally would report 0 undated — proving the count is not page-derived.
+ assertThat(result.items()).allMatch(item -> item.documentDate() != null);
+ }
+
+ @Test
+ void search_undatedCount_ignoresUndatedOnlyToggle() {
+ // The "Nur undatierte" toggle must not skew the count: whether undated=true or
+ // false, the global undated count for the same filter is identical (issue #668).
+ int undatedTotal = 12;
+ for (int i = 0; i < undatedTotal; i++) {
+ documentRepository.save(Document.builder()
+ .title("U-" + i)
+ .originalFilename("u-" + i + ".pdf")
+ .status(DocumentStatus.UPLOADED)
+ .metaDatePrecision(DatePrecision.UNKNOWN)
+ .documentDate(null)
+ .build());
+ }
+
+ DocumentSearchResult unfiltered = documentService.searchDocuments(
+ null, null, null, null, null, null, null, null,
+ DocumentSort.DATE, "DESC", null, false, PageRequest.of(0, 50));
+ DocumentSearchResult undatedOnly = documentService.searchDocuments(
+ null, null, null, null, null, null, null, null,
+ DocumentSort.DATE, "DESC", null, true, PageRequest.of(0, 50));
+
+ assertThat(unfiltered.undatedCount()).isEqualTo(undatedTotal);
+ assertThat(undatedOnly.undatedCount()).isEqualTo(undatedTotal);
+ }
+
+ @Test
+ void search_undatedCount_isZero_insideDateRange() {
+ // A from/to range excludes undated rows by the collision rule (#668), so the
+ // global undated count inside a range is legitimately 0 even when undated docs exist.
+ for (int i = 0; i < 5; i++) {
+ documentRepository.save(Document.builder()
+ .title("U-range-" + i)
+ .originalFilename("u-range-" + i + ".pdf")
+ .status(DocumentStatus.UPLOADED)
+ .metaDatePrecision(DatePrecision.UNKNOWN)
+ .documentDate(null)
+ .build());
+ }
+
+ DocumentSearchResult result = documentService.searchDocuments(
+ null, LocalDate.of(1900, 1, 1), LocalDate.of(2000, 12, 31),
+ null, null, null, null, null,
+ DocumentSort.DATE, "DESC", null, false, PageRequest.of(0, 50));
+
+ assertThat(result.undatedCount()).isZero();
+ }
+
@Test
void search_differentPagesReturnDisjointSlices() {
DocumentSearchResult page0 = documentService.searchDocuments(
null, null, null, null, null, null, null, null,
- DocumentSort.DATE, "DESC", null,
- PageRequest.of(0, 50));
+ DocumentSort.DATE, "DESC", null, false, PageRequest.of(0, 50));
DocumentSearchResult page1 = documentService.searchDocuments(
null, null, null, null, null, null, null, null,
- DocumentSort.DATE, "DESC", null,
- PageRequest.of(1, 50));
+ DocumentSort.DATE, "DESC", null, false, PageRequest.of(1, 50));
// No document id should appear on both pages — slicing must be exclusive.
var idsOnPage0 = page0.items().stream()
diff --git a/backend/src/test/java/org/raddatz/familienarchiv/document/DocumentSearchResultTest.java b/backend/src/test/java/org/raddatz/familienarchiv/document/DocumentSearchResultTest.java
index a487e272..09a8613b 100644
--- a/backend/src/test/java/org/raddatz/familienarchiv/document/DocumentSearchResultTest.java
+++ b/backend/src/test/java/org/raddatz/familienarchiv/document/DocumentSearchResultTest.java
@@ -15,7 +15,8 @@ class DocumentSearchResultTest {
private DocumentListItem item(UUID docId) {
return new DocumentListItem(
- docId, "Test", "test.pdf", null, null, null,
+ docId, "Test", "test.pdf", null, null,
+ DatePrecision.UNKNOWN, null, null,
List.of(), List.of(), null, null, null, null,
0, List.of(), SearchMatchData.empty(),
LocalDateTime.of(2026, 1, 15, 10, 0), LocalDateTime.of(2026, 1, 15, 10, 0));
@@ -66,7 +67,8 @@ class DocumentSearchResultTest {
UUID id = UUID.randomUUID();
ActivityActorDTO actor = new ActivityActorDTO("AB", "#f00", "Anna Braun");
DocumentListItem item = new DocumentListItem(
- id, "T", "t.pdf", null, null, null,
+ id, "T", "t.pdf", null, null,
+ DatePrecision.UNKNOWN, null, null,
List.of(), List.of(), null, null, null, null,
75, List.of(actor), SearchMatchData.empty(),
LocalDateTime.of(2026, 1, 15, 10, 0), LocalDateTime.of(2026, 1, 15, 10, 0));
@@ -100,4 +102,32 @@ class DocumentSearchResultTest {
assertThat(schema.requiredMode()).isEqualTo(Schema.RequiredMode.REQUIRED);
}
}
+
+ @Test
+ void undatedCount_component_is_annotated_as_required_in_openapi_schema() throws NoSuchFieldException {
+ Schema schema = DocumentSearchResult.class.getDeclaredField("undatedCount").getAnnotation(Schema.class);
+ assertThat(schema).isNotNull();
+ assertThat(schema.requiredMode()).isEqualTo(Schema.RequiredMode.REQUIRED);
+ }
+
+ @Test
+ void factories_default_undatedCount_to_zero() {
+ assertThat(DocumentSearchResult.of(List.of()).undatedCount()).isZero();
+ assertThat(DocumentSearchResult.paged(List.of(), PageRequest.of(0, 50), 0L).undatedCount()).isZero();
+ }
+
+ @Test
+ void withUndatedCount_overlays_count_and_preserves_other_fields() {
+ DocumentSearchResult base = DocumentSearchResult.paged(
+ List.of(item(UUID.randomUUID())), PageRequest.of(1, 50), 120L);
+
+ DocumentSearchResult withCount = base.withUndatedCount(7L);
+
+ assertThat(withCount.undatedCount()).isEqualTo(7L);
+ assertThat(withCount.items()).isEqualTo(base.items());
+ assertThat(withCount.totalElements()).isEqualTo(120L);
+ assertThat(withCount.pageNumber()).isEqualTo(1);
+ assertThat(withCount.pageSize()).isEqualTo(50);
+ assertThat(withCount.totalPages()).isEqualTo(3);
+ }
}
diff --git a/backend/src/test/java/org/raddatz/familienarchiv/document/DocumentServiceSortTest.java b/backend/src/test/java/org/raddatz/familienarchiv/document/DocumentServiceSortTest.java
index abf6e389..c3d00619 100644
--- a/backend/src/test/java/org/raddatz/familienarchiv/document/DocumentServiceSortTest.java
+++ b/backend/src/test/java/org/raddatz/familienarchiv/document/DocumentServiceSortTest.java
@@ -67,7 +67,7 @@ class DocumentServiceSortTest {
.thenReturn(new PageImpl<>(List.of(newer, older)));
DocumentSearchResult result = documentService.searchDocuments(
- "Brief", null, null, null, null, null, null, null, DocumentSort.DATE, "DESC", null, PAGE);
+ "Brief", null, null, null, null, null, null, null, DocumentSort.DATE, "DESC", null, false, PAGE);
assertThat(result.items()).hasSize(2);
assertThat(result.items().get(0).id()).isEqualTo(id2); // newer first
@@ -84,7 +84,7 @@ class DocumentServiceSortTest {
.thenReturn(List.of(doc(id1)));
documentService.searchDocuments(
- "Brief", null, null, null, null, null, null, null, DocumentSort.RELEVANCE, null, null, PAGE);
+ "Brief", null, null, null, null, null, null, null, DocumentSort.RELEVANCE, null, null, false, PAGE);
verify(documentRepository).findFtsPageRaw(anyString(), anyInt(), anyInt());
verify(documentRepository, never()).findAllMatchingIdsByFts(anyString());
@@ -102,7 +102,7 @@ class DocumentServiceSortTest {
when(documentRepository.findAllById(any())).thenReturn(List.of(doc(id2), doc(id1))); // unordered from JPA
DocumentSearchResult result = documentService.searchDocuments(
- "Brief", null, null, null, null, null, null, null, DocumentSort.RELEVANCE, null, null, PAGE);
+ "Brief", null, null, null, null, null, null, null, DocumentSort.RELEVANCE, null, null, false, PAGE);
assertThat(result.items().get(0).id()).isEqualTo(id1);
}
@@ -119,7 +119,7 @@ class DocumentServiceSortTest {
when(documentRepository.findAllById(any())).thenReturn(List.of(doc(id2), doc(id1)));
DocumentSearchResult result = documentService.searchDocuments(
- "Brief", null, null, null, null, null, null, null, null, null, null, PAGE);
+ "Brief", null, null, null, null, null, null, null, null, null, null, false, PAGE);
assertThat(result.items().get(0).id()).isEqualTo(id1);
}
@@ -133,7 +133,7 @@ class DocumentServiceSortTest {
DocumentSearchResult result = documentService.searchDocuments(
"Brief", null, null, null, null, null, null, null,
- DocumentSort.RELEVANCE, null, null, hugePage);
+ DocumentSort.RELEVANCE, null, null, false, hugePage);
assertThat(result.items()).isEmpty();
verify(documentRepository, never()).findFtsPageRaw(anyString(), anyInt(), anyInt());
@@ -153,7 +153,7 @@ class DocumentServiceSortTest {
DocumentSearchResult result = documentService.searchDocuments(
"Brief", null, null, null, null, null, null, null,
- DocumentSort.RELEVANCE, null, null, PAGE);
+ DocumentSort.RELEVANCE, null, null, false, PAGE);
assertThat(result.items()).hasSize(1);
assertThat(result.items().get(0).id()).isEqualTo(uuidId);
@@ -173,7 +173,7 @@ class DocumentServiceSortTest {
// sender filter is active → triggers in-memory path, not findFtsPageRaw
LocalDate from = LocalDate.of(1900, 1, 1);
documentService.searchDocuments(
- "Brief", from, null, null, null, null, null, null, DocumentSort.RELEVANCE, null, null, PAGE);
+ "Brief", from, null, null, null, null, null, null, DocumentSort.RELEVANCE, null, null, false, PAGE);
verify(documentRepository, never()).findFtsPageRaw(anyString(), anyInt(), anyInt());
verify(documentRepository).findAllMatchingIdsByFts("Brief");
diff --git a/backend/src/test/java/org/raddatz/familienarchiv/document/DocumentServiceTest.java b/backend/src/test/java/org/raddatz/familienarchiv/document/DocumentServiceTest.java
index 8ef7a6f2..04b84fba 100644
--- a/backend/src/test/java/org/raddatz/familienarchiv/document/DocumentServiceTest.java
+++ b/backend/src/test/java/org/raddatz/familienarchiv/document/DocumentServiceTest.java
@@ -47,6 +47,8 @@ import java.util.UUID;
import static org.assertj.core.api.Assertions.assertThat;
import static org.assertj.core.api.Assertions.assertThatThrownBy;
import static org.mockito.ArgumentMatchers.any;
+import static org.mockito.ArgumentMatchers.anyInt;
+import static org.mockito.ArgumentMatchers.anyString;
import static org.mockito.ArgumentMatchers.eq;
import static org.mockito.ArgumentMatchers.isNull;
import static org.mockito.Mockito.*;
@@ -144,6 +146,53 @@ class DocumentServiceTest {
assertThat(doc.getArchiveFolder()).isEqualTo("Mappe B");
}
+ @Test
+ void updateDocument_persistsDatePrecisionEndAndRaw() throws Exception {
+ UUID id = UUID.randomUUID();
+ Document doc = Document.builder().id(id).receivers(new HashSet<>()).tags(new HashSet<>()).build();
+ when(documentRepository.findById(id)).thenReturn(Optional.of(doc));
+ when(documentRepository.save(any())).thenReturn(doc);
+
+ DocumentUpdateDTO dto = new DocumentUpdateDTO();
+ dto.setDocumentDate(LocalDate.of(1917, 1, 10));
+ dto.setMetaDatePrecision(DatePrecision.RANGE);
+ dto.setMetaDateEnd(LocalDate.of(1917, 1, 11));
+ dto.setMetaDateRaw("10.–11. Januar 1917");
+
+ documentService.updateDocument(id, dto, null, null);
+
+ assertThat(doc.getMetaDatePrecision()).isEqualTo(DatePrecision.RANGE);
+ assertThat(doc.getMetaDateEnd()).isEqualTo(LocalDate.of(1917, 1, 11));
+ assertThat(doc.getMetaDateRaw()).isEqualTo("10.–11. Januar 1917");
+ }
+
+ @Test
+ void updateDocument_preservesStoredPrecision_whenDtoOmitsIt() throws Exception {
+ // Editing a doc (e.g. fixing a location typo) without touching the precision
+ // controls must NOT fabricate a precision. The form omits the three precision
+ // fields → they arrive null on the DTO → the stored values must be preserved.
+ UUID id = UUID.randomUUID();
+ Document doc = Document.builder()
+ .id(id)
+ .metaDatePrecision(DatePrecision.MONTH)
+ .metaDateEnd(LocalDate.of(1916, 6, 30))
+ .metaDateRaw("Juni 1916")
+ .receivers(new HashSet<>())
+ .tags(new HashSet<>())
+ .build();
+ when(documentRepository.findById(id)).thenReturn(Optional.of(doc));
+ when(documentRepository.save(any())).thenReturn(doc);
+
+ DocumentUpdateDTO dto = new DocumentUpdateDTO();
+ dto.setLocation("Berlin"); // unrelated edit; precision fields left null
+
+ documentService.updateDocument(id, dto, null, null);
+
+ assertThat(doc.getMetaDatePrecision()).isEqualTo(DatePrecision.MONTH);
+ assertThat(doc.getMetaDateEnd()).isEqualTo(LocalDate.of(1916, 6, 30));
+ assertThat(doc.getMetaDateRaw()).isEqualTo("Juni 1916");
+ }
+
// ─── deleteTagCascading ───────────────────────────────────────────────────
@Test
@@ -1362,8 +1411,7 @@ class DocumentServiceTest {
.thenReturn(new PageImpl<>(List.of()));
documentService.searchDocuments(null, null, null, null, null, null, null, null,
- org.raddatz.familienarchiv.document.DocumentSort.DATE, "DESC", null,
- org.springframework.data.domain.PageRequest.of(1, 50));
+ org.raddatz.familienarchiv.document.DocumentSort.DATE, "DESC", null, false, org.springframework.data.domain.PageRequest.of(1, 50));
verify(documentRepository).findAll(any(org.springframework.data.jpa.domain.Specification.class), any(Pageable.class));
verify(documentRepository, never()).findAll(any(org.springframework.data.jpa.domain.Specification.class), any(Sort.class));
@@ -1376,8 +1424,7 @@ class DocumentServiceTest {
.thenReturn(new PageImpl<>(List.of()));
documentService.searchDocuments(null, null, null, null, null, null, null, null,
- org.raddatz.familienarchiv.document.DocumentSort.DATE, "DESC", null,
- org.springframework.data.domain.PageRequest.of(3, 25));
+ org.raddatz.familienarchiv.document.DocumentSort.DATE, "DESC", null, false, org.springframework.data.domain.PageRequest.of(3, 25));
verify(documentRepository).findAll(any(org.springframework.data.jpa.domain.Specification.class), captor.capture());
assertThat(captor.getValue().getPageNumber()).isEqualTo(3);
@@ -1393,8 +1440,7 @@ class DocumentServiceTest {
.thenReturn(new PageImpl<>(List.of(d), org.springframework.data.domain.PageRequest.of(0, 50), 120L));
DocumentSearchResult result = documentService.searchDocuments(null, null, null, null, null, null, null, null,
- org.raddatz.familienarchiv.document.DocumentSort.DATE, "DESC", null,
- org.springframework.data.domain.PageRequest.of(0, 50));
+ org.raddatz.familienarchiv.document.DocumentSort.DATE, "DESC", null, false, org.springframework.data.domain.PageRequest.of(0, 50));
assertThat(result.totalElements()).isEqualTo(120L);
assertThat(result.pageNumber()).isZero();
@@ -1403,6 +1449,50 @@ class DocumentServiceTest {
assertThat(result.items()).hasSize(1); // only the slice is enriched
}
+ @Test
+ void searchDocuments_dateSort_DESC_ordersUndatedLast() {
+ ArgumentCaptor captor = ArgumentCaptor.forClass(Pageable.class);
+ when(documentRepository.findAll(any(org.springframework.data.jpa.domain.Specification.class), any(Pageable.class)))
+ .thenReturn(new PageImpl<>(List.of()));
+
+ documentService.searchDocuments(null, null, null, null, null, null, null, null,
+ DocumentSort.DATE, "DESC", null, false, org.springframework.data.domain.PageRequest.of(0, 5));
+
+ verify(documentRepository).findAll(any(org.springframework.data.jpa.domain.Specification.class), captor.capture());
+ Sort.Order dateOrder = captor.getValue().getSort().getOrderFor("documentDate");
+ assertThat(dateOrder).isNotNull();
+ assertThat(dateOrder.getDirection()).isEqualTo(Sort.Direction.DESC);
+ assertThat(dateOrder.getNullHandling()).isEqualTo(Sort.NullHandling.NULLS_LAST);
+ // Owner-decided tiebreaker (#668): title ASC, not createdAt.
+ Sort.Order tiebreak = captor.getValue().getSort().getOrderFor("title");
+ assertThat(tiebreak).isNotNull();
+ assertThat(tiebreak.getDirection()).isEqualTo(Sort.Direction.ASC);
+ assertThat(captor.getValue().getSort().getOrderFor("createdAt")).isNull();
+ }
+
+ @Test
+ void searchDocuments_dateSort_ASC_ordersUndatedLast() {
+ // The ASC bug: Postgres puts NULLs FIRST on ascending sort without explicit
+ // NULLS LAST, surfacing undated documents at the top. This is the red.
+ ArgumentCaptor captor = ArgumentCaptor.forClass(Pageable.class);
+ when(documentRepository.findAll(any(org.springframework.data.jpa.domain.Specification.class), any(Pageable.class)))
+ .thenReturn(new PageImpl<>(List.of()));
+
+ documentService.searchDocuments(null, null, null, null, null, null, null, null,
+ DocumentSort.DATE, "ASC", null, false, org.springframework.data.domain.PageRequest.of(0, 5));
+
+ verify(documentRepository).findAll(any(org.springframework.data.jpa.domain.Specification.class), captor.capture());
+ Sort.Order dateOrder = captor.getValue().getSort().getOrderFor("documentDate");
+ assertThat(dateOrder).isNotNull();
+ assertThat(dateOrder.getDirection()).isEqualTo(Sort.Direction.ASC);
+ assertThat(dateOrder.getNullHandling()).isEqualTo(Sort.NullHandling.NULLS_LAST);
+ // Owner-decided tiebreaker (#668): title ASC, not createdAt.
+ Sort.Order tiebreak = captor.getValue().getSort().getOrderFor("title");
+ assertThat(tiebreak).isNotNull();
+ assertThat(tiebreak.getDirection()).isEqualTo(Sort.Direction.ASC);
+ assertThat(captor.getValue().getSort().getOrderFor("createdAt")).isNull();
+ }
+
@Test
void searchDocuments_UPDATED_AT_sort_resolves_to_updatedAt_field() {
ArgumentCaptor captor = ArgumentCaptor.forClass(Pageable.class);
@@ -1410,8 +1500,7 @@ class DocumentServiceTest {
.thenReturn(new PageImpl<>(List.of()));
documentService.searchDocuments(null, null, null, null, null, null, null, null,
- DocumentSort.UPDATED_AT, "DESC", null,
- org.springframework.data.domain.PageRequest.of(0, 5));
+ DocumentSort.UPDATED_AT, "DESC", null, false, org.springframework.data.domain.PageRequest.of(0, 5));
verify(documentRepository).findAll(any(org.springframework.data.jpa.domain.Specification.class), captor.capture());
assertThat(captor.getValue().getSort())
@@ -1435,8 +1524,7 @@ class DocumentServiceTest {
.thenReturn(all);
DocumentSearchResult result = documentService.searchDocuments(null, null, null, null, null, null, null, null,
- org.raddatz.familienarchiv.document.DocumentSort.SENDER, "asc", null,
- org.springframework.data.domain.PageRequest.of(1, 50));
+ org.raddatz.familienarchiv.document.DocumentSort.SENDER, "asc", null, false, org.springframework.data.domain.PageRequest.of(1, 50));
assertThat(result.totalElements()).isEqualTo(120L);
assertThat(result.pageNumber()).isEqualTo(1);
@@ -1460,8 +1548,7 @@ class DocumentServiceTest {
.thenReturn(all);
DocumentSearchResult result = documentService.searchDocuments(null, null, null, null, null, null, null, null,
- org.raddatz.familienarchiv.document.DocumentSort.SENDER, "asc", null,
- org.springframework.data.domain.PageRequest.of(10, 50));
+ org.raddatz.familienarchiv.document.DocumentSort.SENDER, "asc", null, false, org.springframework.data.domain.PageRequest.of(10, 50));
assertThat(result.items()).isEmpty();
assertThat(result.totalElements()).isEqualTo(30L);
@@ -1474,7 +1561,7 @@ class DocumentServiceTest {
when(documentRepository.findAll(any(org.springframework.data.jpa.domain.Specification.class), any(Pageable.class)))
.thenReturn(new PageImpl<>(List.of()));
- documentService.searchDocuments(null, null, null, null, null, null, null, DocumentStatus.REVIEWED, null, null, null, UNPAGED);
+ documentService.searchDocuments(null, null, null, null, null, null, null, DocumentStatus.REVIEWED, null, null, null, false, UNPAGED);
verify(documentRepository).findAll(any(org.springframework.data.jpa.domain.Specification.class), any(Pageable.class));
}
@@ -1484,7 +1571,7 @@ class DocumentServiceTest {
when(documentRepository.findAll(any(org.springframework.data.jpa.domain.Specification.class), any(Pageable.class)))
.thenReturn(new PageImpl<>(List.of()));
- documentService.searchDocuments(null, null, null, null, null, null, null, null, null, null, null, UNPAGED);
+ documentService.searchDocuments(null, null, null, null, null, null, null, null, null, null, null, false, UNPAGED);
verify(documentRepository).findAll(any(org.springframework.data.jpa.domain.Specification.class), any(Pageable.class));
}
@@ -1562,7 +1649,7 @@ class DocumentServiceTest {
.thenReturn(List.of(withSender, noSender));
DocumentSearchResult result = documentService.searchDocuments(
- null, null, null, null, null, null, null, null, DocumentSort.SENDER, "asc", null, UNPAGED);
+ null, null, null, null, null, null, null, null, DocumentSort.SENDER, "asc", null, false, UNPAGED);
assertThat(result.items()).hasSize(2);
assertThat(result.items()).extracting(DocumentListItem::title).containsExactly("Has Sender", "No Sender");
@@ -1582,12 +1669,117 @@ class DocumentServiceTest {
.thenReturn(List.of(noReceivers, withReceiver));
DocumentSearchResult result = documentService.searchDocuments(
- null, null, null, null, null, null, null, null, DocumentSort.RECEIVER, "asc", null, UNPAGED);
+ null, null, null, null, null, null, null, null, DocumentSort.RECEIVER, "asc", null, false, UNPAGED);
assertThat(result.items()).extracting(DocumentListItem::title)
.containsExactly("Has Receiver", "No Receivers");
}
+ // ─── searchDocuments — undated docs stay in their person group (#668) ───────
+
+ @Test
+ void searchDocuments_senderSort_asc_keepsUndatedInsideSenderGroupNotAtHead() {
+ // Locking test (#668): the in-memory SENDER comparator orders by sender name,
+ // not by date, so an undated (null documentDate) letter must stay WITHIN its
+ // sender's group — it must NOT float to the head of a multi-sender page.
+ // Two senders, each with a dated + an undated doc. ASC by "lastName firstName":
+ // "Adler Bob" < "Ziegler Anna", so both of Bob's docs come before both of Anna's.
+ // The undated doc supplied FIRST in the input proves grouping (not date) wins:
+ // were it ordered by date, the two undated docs would clump together at one end.
+ Person bobAdler = Person.builder().id(UUID.randomUUID()).firstName("Bob").lastName("Adler").build();
+ Person annaZiegler = Person.builder().id(UUID.randomUUID()).firstName("Anna").lastName("Ziegler").build();
+ Document undatedBob = Document.builder().id(UUID.randomUUID()).title("Bob undated")
+ .sender(bobAdler).documentDate(null).build();
+ Document datedBob = Document.builder().id(UUID.randomUUID()).title("Bob dated")
+ .sender(bobAdler).documentDate(LocalDate.of(1916, 6, 15)).build();
+ Document undatedAnna = Document.builder().id(UUID.randomUUID()).title("Anna undated")
+ .sender(annaZiegler).documentDate(null).build();
+ Document datedAnna = Document.builder().id(UUID.randomUUID()).title("Anna dated")
+ .sender(annaZiegler).documentDate(LocalDate.of(1943, 12, 24)).build();
+
+ // Input order interleaves dated/undated so a date-based regression would reorder.
+ when(documentRepository.findAll(any(org.springframework.data.jpa.domain.Specification.class)))
+ .thenReturn(List.of(undatedBob, datedAnna, datedBob, undatedAnna));
+
+ DocumentSearchResult result = documentService.searchDocuments(
+ null, null, null, null, null, null, null, null, DocumentSort.SENDER, "asc", null, false, UNPAGED);
+
+ // Bob's group precedes Anna's group (ASC by sender). The sort is stable, so
+ // within each group the input order is preserved (undatedBob, datedBob for Bob;
+ // datedAnna, undatedAnna for Anna). The undated docs never jump to the head and
+ // each stays inside its sender group — a date-based comparator would instead
+ // clump the two undated docs together at one end.
+ assertThat(result.items()).extracting(DocumentListItem::title)
+ .containsExactly("Bob undated", "Bob dated", "Anna dated", "Anna undated");
+ }
+
+ @Test
+ void searchDocuments_senderSort_desc_keepsUndatedInsideSenderGroupNotAtHead() {
+ // DESC symmetry for the in-memory path: sender order reverses ("Ziegler Anna"
+ // before "Adler Bob"), but the undated doc still sorts by sender, never by date,
+ // so it stays within its group and does not surface at the page head.
+ Person bobAdler = Person.builder().id(UUID.randomUUID()).firstName("Bob").lastName("Adler").build();
+ Person annaZiegler = Person.builder().id(UUID.randomUUID()).firstName("Anna").lastName("Ziegler").build();
+ Document undatedBob = Document.builder().id(UUID.randomUUID()).title("Bob undated")
+ .sender(bobAdler).documentDate(null).build();
+ Document datedBob = Document.builder().id(UUID.randomUUID()).title("Bob dated")
+ .sender(bobAdler).documentDate(LocalDate.of(1916, 6, 15)).build();
+ Document undatedAnna = Document.builder().id(UUID.randomUUID()).title("Anna undated")
+ .sender(annaZiegler).documentDate(null).build();
+ Document datedAnna = Document.builder().id(UUID.randomUUID()).title("Anna dated")
+ .sender(annaZiegler).documentDate(LocalDate.of(1943, 12, 24)).build();
+
+ when(documentRepository.findAll(any(org.springframework.data.jpa.domain.Specification.class)))
+ .thenReturn(List.of(undatedBob, datedAnna, datedBob, undatedAnna));
+
+ DocumentSearchResult result = documentService.searchDocuments(
+ null, null, null, null, null, null, null, null, DocumentSort.SENDER, "desc", null, false, UNPAGED);
+
+ // Anna's group precedes Bob's (DESC by sender); undated stays inside its group.
+ assertThat(result.items()).extracting(DocumentListItem::title)
+ .containsExactly("Anna dated", "Anna undated", "Bob undated", "Bob dated");
+ }
+
+ @Test
+ void searchDocuments_undatedTrue_withSenderSort_appliesUndatedSpecification() {
+ // Reachable UI state: "Nur undatierte" toggled on while grouped by sender.
+ // The SENDER sort takes the in-memory path, but the undatedOnly predicate must
+ // still be composed into the Specification handed to the repository — proven by
+ // capturing the spec passed to findAll and confirming it filters to null dates.
+ Person alice = Person.builder().id(UUID.randomUUID()).firstName("Alice").lastName("Ziegler").build();
+ Document undatedFromAlice = Document.builder().id(UUID.randomUUID()).title("Undated")
+ .sender(alice).documentDate(null).build();
+
+ org.mockito.ArgumentCaptor> specCaptor =
+ org.mockito.ArgumentCaptor.forClass(org.springframework.data.jpa.domain.Specification.class);
+ when(documentRepository.findAll(specCaptor.capture()))
+ .thenReturn(List.of(undatedFromAlice));
+
+ DocumentSearchResult result = documentService.searchDocuments(
+ null, null, null, null, null, null, null, null, DocumentSort.SENDER, "asc", null, true, UNPAGED);
+
+ // The in-memory path queried via a Specification (built by buildSearchSpec with
+ // undatedOnly(true)) rather than skipping straight to a sorted findAll.
+ assertThat(specCaptor.getValue()).isNotNull();
+ assertThat(result.items()).extracting(DocumentListItem::title).containsExactly("Undated");
+ }
+
+ @Test
+ void searchDocuments_undatedTrue_usesSpecificationPath_notPureTextRelevanceShortcut() {
+ // undated=true must bypass the pure-text RELEVANCE SQL shortcut, which
+ // skips buildSearchSpec and would silently drop the undatedOnly predicate.
+ when(documentRepository.findAllMatchingIdsByFts("brief")).thenReturn(List.of(UUID.randomUUID()));
+ when(documentRepository.findAll(any(org.springframework.data.jpa.domain.Specification.class)))
+ .thenReturn(List.of());
+
+ documentService.searchDocuments("brief", null, null, null, null, null, null, null,
+ DocumentSort.RELEVANCE, null, null, true, UNPAGED);
+
+ // The FTS-id path (buildSearchSpec) ran; the raw-page SQL shortcut did not.
+ verify(documentRepository).findAllMatchingIdsByFts("brief");
+ verify(documentRepository, never()).findFtsPageRaw(anyString(), anyInt(), anyInt());
+ }
+
@Test
void searchDocuments_senderSort_nullLastNameSortsToEnd() {
// Without fix: null lastName produces sort key "null Smith" which compares
@@ -1604,7 +1796,7 @@ class DocumentServiceTest {
.thenReturn(List.of(docNullName, docSmith));
DocumentSearchResult result = documentService.searchDocuments(
- null, null, null, null, null, null, null, null, DocumentSort.SENDER, "asc", null, UNPAGED);
+ null, null, null, null, null, null, null, null, DocumentSort.SENDER, "asc", null, false, UNPAGED);
// null lastName should sort to end (treated as empty), not before "smith" (as "null")
assertThat(result.items()).extracting(DocumentListItem::title)
@@ -1627,7 +1819,7 @@ class DocumentServiceTest {
when(documentRepository.findEnrichmentData(any(), eq("Brief"))).thenReturn(rows);
DocumentSearchResult result = documentService.searchDocuments(
- "Brief", null, null, null, null, null, null, null, DocumentSort.RELEVANCE, null, null, UNPAGED);
+ "Brief", null, null, null, null, null, null, null, DocumentSort.RELEVANCE, null, null, false, UNPAGED);
assertThat(result.items()).hasSize(1);
SearchMatchData md = result.items().get(0).matchData();
@@ -1641,8 +1833,7 @@ class DocumentServiceTest {
.thenReturn(new PageImpl<>(List.of()));
DocumentSearchResult result = documentService.searchDocuments(
- null, null, null, null, null, null, null, null, null, null, null,
- UNPAGED);
+ null, null, null, null, null, null, null, null, null, null, null, false, UNPAGED);
assertThat(result.items()).isEmpty();
}
@@ -1662,7 +1853,7 @@ class DocumentServiceTest {
when(documentRepository.findEnrichmentData(any(), eq("Brief"))).thenReturn(rows);
DocumentSearchResult result = documentService.searchDocuments(
- "Brief", null, null, null, null, null, null, null, DocumentSort.RELEVANCE, null, null, UNPAGED);
+ "Brief", null, null, null, null, null, null, null, DocumentSort.RELEVANCE, null, null, false, UNPAGED);
SearchMatchData md = result.items().get(0).matchData();
assertThat(md.transcriptionSnippet()).isEqualTo("Hier ist der Brief aus Berlin");
@@ -2179,7 +2370,7 @@ class DocumentServiceTest {
.thenReturn(List.of(d1, d2));
List result = documentService.findIdsForFilter(
- null, null, null, null, null, null, null, null, null);
+ null, null, null, null, null, null, null, null, null, false);
assertThat(result).containsExactly(d1.getId(), d2.getId());
}
@@ -2194,7 +2385,7 @@ class DocumentServiceTest {
when(tagService.expandTagNamesToDescendantIdSets(any())).thenReturn(List.of());
documentService.findIdsForFilter(
- null, null, null, null, null, List.of("Brief"), null, null, TagOperator.OR);
+ null, null, null, null, null, List.of("Brief"), null, null, TagOperator.OR, false);
// Spec built without throwing → OR branch was exercised. Coverage gain
// is in not-throwing on the OR-specific code path; the actual SQL is
@@ -2207,7 +2398,7 @@ class DocumentServiceTest {
when(documentRepository.findAllMatchingIdsByFts("xyz")).thenReturn(List.of());
List result = documentService.findIdsForFilter(
- "xyz", null, null, null, null, null, null, null, null);
+ "xyz", null, null, null, null, null, null, null, null, false);
assertThat(result).isEmpty();
verify(documentRepository, never()).findAll(any(org.springframework.data.jpa.domain.Specification.class));
diff --git a/backend/src/test/java/org/raddatz/familienarchiv/document/DocumentSpecificationsTest.java b/backend/src/test/java/org/raddatz/familienarchiv/document/DocumentSpecificationsTest.java
index 7af1ec22..b9f8a46d 100644
--- a/backend/src/test/java/org/raddatz/familienarchiv/document/DocumentSpecificationsTest.java
+++ b/backend/src/test/java/org/raddatz/familienarchiv/document/DocumentSpecificationsTest.java
@@ -261,4 +261,21 @@ class DocumentSpecificationsTest {
assertThat(result).isEmpty();
}
+ // ─── undatedOnly ──────────────────────────────────────────────────────────
+
+ @Test
+ void undatedOnly_false_returnsAllDocuments() {
+ // false → no predicate (null), so the filter is a no-op (issue #668).
+ List result = documentRepository.findAll(Specification.where(undatedOnly(false)));
+ assertThat(result).hasSize(3);
+ }
+
+ @Test
+ void undatedOnly_true_returnsOnlyDocumentsWithoutADate() {
+ // Only the placeholder photo has a null documentDate in the fixture.
+ List result = documentRepository.findAll(Specification.where(undatedOnly(true)));
+ assertThat(result).extracting(Document::getTitle).containsExactly("Familienfoto");
+ assertThat(result).allMatch(d -> d.getDocumentDate() == null);
+ }
+
}
diff --git a/backend/src/test/java/org/raddatz/familienarchiv/document/UndatedDocumentOrderingIntegrationTest.java b/backend/src/test/java/org/raddatz/familienarchiv/document/UndatedDocumentOrderingIntegrationTest.java
new file mode 100644
index 00000000..e1eeddc7
--- /dev/null
+++ b/backend/src/test/java/org/raddatz/familienarchiv/document/UndatedDocumentOrderingIntegrationTest.java
@@ -0,0 +1,149 @@
+package org.raddatz.familienarchiv.document;
+
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+import org.raddatz.familienarchiv.PostgresContainerConfig;
+import org.raddatz.familienarchiv.config.FlywayConfig;
+import org.springframework.beans.factory.annotation.Autowired;
+import org.springframework.boot.data.jpa.test.autoconfigure.DataJpaTest;
+import org.springframework.boot.jdbc.test.autoconfigure.AutoConfigureTestDatabase;
+import org.springframework.context.annotation.Import;
+import org.springframework.data.domain.Sort;
+import org.springframework.data.jpa.domain.Specification;
+
+import java.time.LocalDate;
+import java.util.List;
+
+import static org.assertj.core.api.Assertions.assertThat;
+import static org.raddatz.familienarchiv.document.DocumentSpecifications.isBetween;
+import static org.raddatz.familienarchiv.document.DocumentSpecifications.undatedOnly;
+
+/**
+ * Real-Postgres assertions for issue #668. H2 disagrees with Postgres on
+ * {@code NULLS FIRST/LAST} defaults and on whether {@code BETWEEN} excludes
+ * NULL, so these guarantees MUST run against {@code postgres:16-alpine}, never
+ * an in-memory database.
+ */
+@DataJpaTest
+@AutoConfigureTestDatabase(replace = AutoConfigureTestDatabase.Replace.NONE)
+@Import({PostgresContainerConfig.class, FlywayConfig.class})
+class UndatedDocumentOrderingIntegrationTest {
+
+ @Autowired DocumentRepository documentRepository;
+
+ @BeforeEach
+ void setUp() {
+ documentRepository.deleteAll();
+ save("1916", LocalDate.of(1916, 6, 15));
+ save("1943", LocalDate.of(1943, 12, 24));
+ save("undated-a", null);
+ save("undated-b", null);
+ }
+
+ private void save(String title, LocalDate date) {
+ documentRepository.save(Document.builder()
+ .title(title)
+ .originalFilename(title + ".pdf")
+ .status(DocumentStatus.UPLOADED)
+ .metaDatePrecision(date == null ? DatePrecision.UNKNOWN : DatePrecision.DAY)
+ .documentDate(date)
+ .build());
+ }
+
+ @Test
+ void dateAscWithNullsLast_returnsDatedFirstUndatedLast() {
+ Sort sort = Sort.by(new Sort.Order(Sort.Direction.ASC, "documentDate").nullsLast());
+
+ List result = documentRepository.findAll(sort);
+
+ assertThat(result).hasSize(4);
+ assertThat(result.get(0).getDocumentDate()).isEqualTo(LocalDate.of(1916, 6, 15));
+ assertThat(result.get(1).getDocumentDate()).isEqualTo(LocalDate.of(1943, 12, 24));
+ assertThat(result.get(2).getDocumentDate()).isNull();
+ assertThat(result.get(3).getDocumentDate()).isNull();
+ }
+
+ @Test
+ void sameDate_tiebreaksByTitleAsc_notCreatedAt_forBothDirections() throws Exception {
+ // Owner decision (#668): equal-date rows tie-break by title ASC, NOT
+ // createdAt. Insert two same-date docs so that createdAt order (insertion
+ // order) is the OPPOSITE of title order: the first-saved doc gets the later
+ // title ("zzz-first"), the second-saved doc gets the earlier title
+ // ("aaa-second"). If the tiebreaker were still createdAt-asc the first-saved
+ // row would lead; because it is title-asc the "aaa-second" row must lead —
+ // and it must lead in BOTH ASC and DESC date directions, since the date is
+ // equal so only the title tiebreaker decides.
+ //
+ // The Sort under test is built by the PRODUCTION resolveSort(DATE, dir) (via
+ // reflection — it is private), not hand-rolled here, so this test proves the
+ // real Postgres ordering that production emits, on real same-date rows.
+ documentRepository.deleteAll();
+ LocalDate sameDate = LocalDate.of(1920, 3, 3);
+ save("zzz-first", sameDate); // saved first → earlier createdAt
+ save("aaa-second", sameDate); // saved second → later createdAt
+
+ List asc = documentRepository.findAll(resolveProductionSort("ASC"));
+ assertThat(asc).extracting(Document::getTitle)
+ .containsExactly("aaa-second", "zzz-first");
+
+ List desc = documentRepository.findAll(resolveProductionSort("DESC"));
+ assertThat(desc).extracting(Document::getTitle)
+ .containsExactly("aaa-second", "zzz-first");
+ }
+
+ /**
+ * Invokes the production {@link DocumentService#resolveSort(DocumentSort, String)}
+ * for the DATE sort so the integration assertions exercise the real tiebreaker
+ * choice rather than a sort hand-built in the test.
+ */
+ private Sort resolveProductionSort(String dir) throws Exception {
+ // resolveSort is a pure function of its arguments (uses no instance state), so a
+ // bean instance with null collaborators is sufficient to exercise it.
+ var ctor = DocumentService.class.getDeclaredConstructors()[0];
+ ctor.setAccessible(true);
+ Object[] args = new Object[ctor.getParameterCount()];
+ DocumentService service = (DocumentService) ctor.newInstance(args);
+ var m = DocumentService.class.getDeclaredMethod("resolveSort", DocumentSort.class, String.class);
+ m.setAccessible(true);
+ return (Sort) m.invoke(service, DocumentSort.DATE, dir);
+ }
+
+ @Test
+ void undatedOnly_returnsExactlyTheNullDatedRows() {
+ List result = documentRepository.findAll(undatedOnly(true));
+
+ assertThat(result).hasSize(2);
+ assertThat(result).allMatch(d -> d.getDocumentDate() == null);
+ }
+
+ @Test
+ void undatedOnly_false_returnsAllRows() {
+ Specification spec = Specification.where(undatedOnly(false));
+
+ List result = documentRepository.findAll(spec);
+
+ assertThat(result).hasSize(4);
+ }
+
+ @Test
+ void dateRange_excludesUndatedRows() {
+ List result = documentRepository.findAll(isBetween(
+ LocalDate.of(1900, 1, 1), LocalDate.of(2000, 12, 31)));
+
+ assertThat(result).hasSize(2);
+ assertThat(result).allMatch(d -> d.getDocumentDate() != null);
+ }
+
+ @Test
+ void undatedOnly_combinedWithDateRange_returnsEmpty() {
+ // The collision rule (#668): a from/to range and undated=true are mutually
+ // exclusive — a row cannot both have a null date and fall inside a range.
+ Specification spec = Specification
+ .where(undatedOnly(true))
+ .and(isBetween(LocalDate.of(1900, 1, 1), LocalDate.of(2000, 12, 31)));
+
+ List result = documentRepository.findAll(spec);
+
+ assertThat(result).isEmpty();
+ }
+}
diff --git a/backend/src/test/java/org/raddatz/familienarchiv/importing/CanonicalImportIntegrationTest.java b/backend/src/test/java/org/raddatz/familienarchiv/importing/CanonicalImportIntegrationTest.java
new file mode 100644
index 00000000..090ffe31
--- /dev/null
+++ b/backend/src/test/java/org/raddatz/familienarchiv/importing/CanonicalImportIntegrationTest.java
@@ -0,0 +1,229 @@
+package org.raddatz.familienarchiv.importing;
+
+import org.apache.poi.ss.usermodel.Row;
+import org.apache.poi.ss.usermodel.Sheet;
+import org.apache.poi.xssf.usermodel.XSSFWorkbook;
+import org.junit.jupiter.api.AfterEach;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+import org.raddatz.familienarchiv.PostgresContainerConfig;
+import org.raddatz.familienarchiv.document.Document;
+import org.raddatz.familienarchiv.document.DocumentRepository;
+import org.raddatz.familienarchiv.document.DocumentStatus;
+import org.raddatz.familienarchiv.person.Person;
+import org.raddatz.familienarchiv.person.PersonRepository;
+import org.raddatz.familienarchiv.tag.TagRepository;
+import org.springframework.beans.factory.annotation.Autowired;
+import org.springframework.boot.test.context.SpringBootTest;
+import org.springframework.context.annotation.Import;
+import org.springframework.test.context.ActiveProfiles;
+import org.springframework.test.context.bean.override.mockito.MockitoBean;
+import org.springframework.test.util.ReflectionTestUtils;
+import software.amazon.awssdk.services.s3.S3Client;
+
+import java.io.OutputStream;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.List;
+import java.util.Optional;
+
+import static org.assertj.core.api.Assertions.assertThat;
+
+/**
+ * Real Postgres (Testcontainers) integration test for the canonical importer. The
+ * {@code UNIQUE(source_ref)} constraint and the upsert-on-conflict behaviour only exist
+ * in real Postgres (never H2), so idempotency is verified here. S3 is mocked — the
+ * synthetic document rows carry no on-disk files, so every document is a PLACEHOLDER and
+ * no upload is attempted.
+ */
+@SpringBootTest(webEnvironment = SpringBootTest.WebEnvironment.NONE)
+@ActiveProfiles("test")
+@Import(PostgresContainerConfig.class)
+class CanonicalImportIntegrationTest {
+
+ @MockitoBean S3Client s3Client;
+
+ @Autowired CanonicalImportOrchestrator orchestrator;
+ @Autowired PersonRepository personRepository;
+ @Autowired TagRepository tagRepository;
+ @Autowired DocumentRepository documentRepository;
+
+ Path artifactDir;
+
+ @BeforeEach
+ void setUp() throws Exception {
+ documentRepository.deleteAll();
+ personRepository.deleteAll();
+ tagRepository.deleteAll();
+ artifactDir = Files.createTempDirectory("canonical-import-it");
+ writeArtifacts(artifactDir);
+ ReflectionTestUtils.setField(orchestrator, "canonicalDir", artifactDir.toString());
+ }
+
+ /**
+ * The import commits through its own transactions (the orchestrator is not transactional),
+ * so this test cannot rely on {@code @Transactional} rollback for isolation. Delete the
+ * committed rows after each test — otherwise the last test's documents (dated 1888-02) and
+ * persons/tags leak into the shared Testcontainers Postgres and pollute other integration
+ * tests that assume a known seed (e.g. DocumentDensityIntegrationTest,
+ * DocumentSearchPagedIntegrationTest). Mirrors the @AfterEach deleteAll convention used by
+ * DocumentListItemIntegrationTest.
+ */
+ @AfterEach
+ void cleanup() {
+ documentRepository.deleteAll();
+ personRepository.deleteAll();
+ tagRepository.deleteAll();
+ }
+
+ @Test
+ void reimport_isIdempotent_noDuplicatePersonsTagsOrDocuments() {
+ orchestrator.runImport();
+ long personsAfterFirst = personRepository.count();
+ long tagsAfterFirst = tagRepository.count();
+ long documentsAfterFirst = documentRepository.count();
+ assertThat(orchestrator.getStatus().state()).isEqualTo(ImportStatus.State.DONE);
+ assertThat(personsAfterFirst).isPositive();
+ assertThat(tagsAfterFirst).isPositive();
+ assertThat(documentsAfterFirst).isPositive();
+
+ orchestrator.runImport();
+
+ assertThat(personRepository.count()).isEqualTo(personsAfterFirst);
+ assertThat(tagRepository.count()).isEqualTo(tagsAfterFirst);
+ assertThat(documentRepository.count()).isEqualTo(documentsAfterFirst);
+ }
+
+ @Test
+ void reimport_preservesHumanEditedPersonField() {
+ orchestrator.runImport();
+ Person walter = personRepository.findBySourceRef("de-gruyter-walter").orElseThrow();
+ walter.setNotes("Verified by archivist");
+ walter.setFirstName("Walther");
+ personRepository.save(walter);
+
+ orchestrator.runImport();
+
+ Person reimported = personRepository.findBySourceRef("de-gruyter-walter").orElseThrow();
+ assertThat(reimported.getNotes()).isEqualTo("Verified by archivist");
+ assertThat(reimported.getFirstName()).isEqualTo("Walther");
+ }
+
+ @Test
+ void import_linksDocumentSenderToRegisterPerson_andRetainsRawText() {
+ orchestrator.runImport();
+
+ Person walter = personRepository.findBySourceRef("de-gruyter-walter").orElseThrow();
+ Document doc = documentRepository.findByOriginalFilename("W-0001").orElseThrow();
+ assertThat(doc.getSender()).isNotNull();
+ assertThat(doc.getSender().getId()).isEqualTo(walter.getId());
+ assertThat(doc.getSenderText()).isEqualTo("Walter de Gruyter");
+ assertThat(doc.getStatus()).isEqualTo(DocumentStatus.PLACEHOLDER);
+ }
+
+ @Test
+ void import_provisionalFlag_trueForImporterCreated_falseForRegister() {
+ orchestrator.runImport();
+
+ Optional register = personRepository.findBySourceRef("de-gruyter-walter");
+ assertThat(register).get().extracting(Person::isProvisional).isEqualTo(false);
+ }
+
+ @Test
+ void reimport_prunesRemovedReceiverAndTag_whenCanonicalRowShrinks() throws Exception {
+ orchestrator.runImport();
+ // findById uses the Document.full entity graph so receivers/tags initialise eagerly.
+ Document before = documentRepository.findById(
+ documentRepository.findByOriginalFilename("W-0001").orElseThrow().getId()).orElseThrow();
+ assertThat(before.getReceivers()).isNotEmpty();
+ assertThat(before.getTags()).isNotEmpty();
+
+ // Re-stage the document sheet with W-0001's receiver and tag removed.
+ writeSheet(artifactDir.resolve("canonical-documents.xlsx"),
+ List.of("index", "sender_person_id", "sender_name", "receiver_person_ids",
+ "receiver_names", "date_iso", "date_raw", "date_precision", "date_end", "location", "tags", "summary"),
+ List.of(
+ List.of("W-0001", "de-gruyter-walter", "Walter de Gruyter",
+ "", "", "1888-02-15", "15.2.1888", "DAY", "", "Rotterdam", "", "Geschäftsreise"),
+ List.of("W-0002", "de-gruyter-eugenie", "Eugenie de Gruyter",
+ "de-gruyter-walter", "Walter de Gruyter", "1888-02-16", "16.2.1888", "DAY", "",
+ "Middelburg", "Themen/Brautbriefe", "Reisepläne")));
+
+ orchestrator.runImport();
+
+ Document after = documentRepository.findById(before.getId()).orElseThrow();
+ assertThat(after.getReceivers()).isEmpty();
+ assertThat(after.getTags()).isEmpty();
+ }
+
+ @Test
+ void import_neverFlipsRegisterPersonToProvisional_whenReferencedByDocumentRow() {
+ // de-gruyter-walter is a register person (provisional=false) AND the sender of W-0001.
+ // The orchestrator loads the register before documents, so the document loader's
+ // register-first match links the existing person and never mints a provisional one.
+ // A second run (documents reference the same person again) must not flip it true.
+ orchestrator.runImport();
+ orchestrator.runImport();
+
+ Person walter = personRepository.findBySourceRef("de-gruyter-walter").orElseThrow();
+ assertThat(walter.isProvisional()).isFalse();
+ Person eugenie = personRepository.findBySourceRef("de-gruyter-eugenie").orElseThrow();
+ assertThat(eugenie.isProvisional()).isFalse();
+ }
+
+ // ─── synthetic-but-real artifact set ─────────────────────────────────────────────
+
+ private void writeArtifacts(Path dir) throws Exception {
+ writeSheet(dir.resolve("canonical-tag-tree.xlsx"),
+ List.of("tag_path", "parent_name", "tag_name"),
+ List.of(
+ List.of("Themen", "", "Themen"),
+ List.of("Themen/Brautbriefe", "Themen", "Brautbriefe")));
+
+ writeSheet(dir.resolve("canonical-persons.xlsx"),
+ List.of("person_id", "last_name", "first_name", "maiden_name", "notes", "birth_date", "death_date", "provisional"),
+ List.of(
+ List.of("de-gruyter-walter", "de Gruyter", "Walter", "", "", "1865-01-01", "", "False"),
+ List.of("de-gruyter-eugenie", "de Gruyter", "Eugenie", "Wöhler", "", "", "", "False")));
+
+ Files.writeString(dir.resolve("canonical-persons-tree.json"), """
+ {"persons":[
+ {"rowId":"row_1","firstName":"Walter","lastName":"de Gruyter","familyMember":true,"personId":"de-gruyter-walter"},
+ {"rowId":"row_2","firstName":"Eugenie","lastName":"de Gruyter","maidenName":"Wöhler","familyMember":true,"personId":"de-gruyter-eugenie"}
+ ],"relationships":[
+ {"personId":"row_1","relatedPersonId":"row_2","type":"SPOUSE_OF","source":"verheiratet_mit"}
+ ]}
+ """);
+
+ writeSheet(dir.resolve("canonical-documents.xlsx"),
+ List.of("index", "sender_person_id", "sender_name", "receiver_person_ids",
+ "receiver_names", "date_iso", "date_raw", "date_precision", "date_end", "location", "tags", "summary"),
+ List.of(
+ List.of("W-0001", "de-gruyter-walter", "Walter de Gruyter",
+ "de-gruyter-eugenie", "Eugenie de Gruyter", "1888-02-15", "15.2.1888", "DAY", "",
+ "Rotterdam", "Themen/Brautbriefe", "Geschäftsreise"),
+ List.of("W-0002", "de-gruyter-eugenie", "Eugenie de Gruyter",
+ "de-gruyter-walter", "Walter de Gruyter", "1888-02-16", "16.2.1888", "DAY", "",
+ "Middelburg", "Themen/Brautbriefe", "Reisepläne")));
+ }
+
+ private void writeSheet(Path file, List headers, List> rows) throws Exception {
+ try (XSSFWorkbook wb = new XSSFWorkbook()) {
+ Sheet sheet = wb.createSheet("Sheet1");
+ Row header = sheet.createRow(0);
+ for (int i = 0; i < headers.size(); i++) {
+ header.createCell(i).setCellValue(headers.get(i));
+ }
+ for (int r = 0; r < rows.size(); r++) {
+ Row row = sheet.createRow(r + 1);
+ List values = rows.get(r);
+ for (int c = 0; c < values.size(); c++) {
+ row.createCell(c).setCellValue(values.get(c));
+ }
+ }
+ try (OutputStream out = Files.newOutputStream(file)) {
+ wb.write(out);
+ }
+ }
+ }
+}
diff --git a/backend/src/test/java/org/raddatz/familienarchiv/importing/CanonicalImportOrchestratorTest.java b/backend/src/test/java/org/raddatz/familienarchiv/importing/CanonicalImportOrchestratorTest.java
new file mode 100644
index 00000000..dc12d070
--- /dev/null
+++ b/backend/src/test/java/org/raddatz/familienarchiv/importing/CanonicalImportOrchestratorTest.java
@@ -0,0 +1,130 @@
+package org.raddatz.familienarchiv.importing;
+
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.extension.ExtendWith;
+import org.junit.jupiter.api.io.TempDir;
+import org.mockito.InOrder;
+import org.mockito.Mock;
+import org.mockito.junit.jupiter.MockitoExtension;
+import org.raddatz.familienarchiv.exception.DomainException;
+import org.springframework.test.util.ReflectionTestUtils;
+
+import java.io.File;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.List;
+
+import static org.assertj.core.api.Assertions.assertThat;
+import static org.assertj.core.api.Assertions.assertThatThrownBy;
+import static org.mockito.ArgumentMatchers.any;
+import static org.mockito.Mockito.inOrder;
+import static org.mockito.Mockito.never;
+import static org.mockito.Mockito.verify;
+import static org.mockito.Mockito.when;
+
+@ExtendWith(MockitoExtension.class)
+class CanonicalImportOrchestratorTest {
+
+ @Mock TagTreeImporter tagTreeImporter;
+ @Mock PersonRegisterImporter personRegisterImporter;
+ @Mock PersonTreeImporter personTreeImporter;
+ @Mock DocumentImporter documentImporter;
+
+ private CanonicalImportOrchestrator orchestrator(Path dir) {
+ CanonicalImportOrchestrator o = new CanonicalImportOrchestrator(
+ tagTreeImporter, personRegisterImporter, personTreeImporter, documentImporter);
+ ReflectionTestUtils.setField(o, "canonicalDir", dir.toString());
+ return o;
+ }
+
+ private void writeAllArtifacts(Path dir) throws Exception {
+ Files.writeString(dir.resolve("canonical-tag-tree.xlsx"), "x");
+ Files.writeString(dir.resolve("canonical-persons.xlsx"), "x");
+ Files.writeString(dir.resolve("canonical-persons-tree.json"), "x");
+ Files.writeString(dir.resolve("canonical-documents.xlsx"), "x");
+ }
+
+ @Test
+ void getStatus_isIdleByDefault(@TempDir Path dir) {
+ assertThat(orchestrator(dir).getStatus().state()).isEqualTo(ImportStatus.State.IDLE);
+ }
+
+ @Test
+ void runImport_loadsTagsAndPersonsBeforeDocuments(@TempDir Path dir) throws Exception {
+ writeAllArtifacts(dir);
+ when(documentImporter.load(any())).thenReturn(new DocumentImporter.LoadResult(0, List.of()));
+ CanonicalImportOrchestrator o = orchestrator(dir);
+
+ o.runImport();
+
+ InOrder order = inOrder(tagTreeImporter, personRegisterImporter, personTreeImporter, documentImporter);
+ order.verify(tagTreeImporter).load(any());
+ order.verify(personRegisterImporter).load(any());
+ order.verify(personTreeImporter).load(any());
+ order.verify(documentImporter).load(any());
+ }
+
+ @Test
+ void runImport_setsStatusDone_onSuccess(@TempDir Path dir) throws Exception {
+ writeAllArtifacts(dir);
+ when(documentImporter.load(any())).thenReturn(new DocumentImporter.LoadResult(3, List.of()));
+ CanonicalImportOrchestrator o = orchestrator(dir);
+
+ o.runImport();
+
+ assertThat(o.getStatus().state()).isEqualTo(ImportStatus.State.DONE);
+ assertThat(o.getStatus().processed()).isEqualTo(3);
+ }
+
+ @Test
+ void runImport_failsClosed_whenAnArtifactIsMissing(@TempDir Path dir) throws Exception {
+ Files.writeString(dir.resolve("canonical-tag-tree.xlsx"), "x");
+ // the other three artifacts are absent
+ CanonicalImportOrchestrator o = orchestrator(dir);
+
+ o.runImport();
+
+ assertThat(o.getStatus().state()).isEqualTo(ImportStatus.State.FAILED);
+ verify(tagTreeImporter, never()).load(any());
+ verify(documentImporter, never()).load(any());
+ }
+
+ @Test
+ void runImport_setsStatusFailed_whenLoaderThrows(@TempDir Path dir) throws Exception {
+ writeAllArtifacts(dir);
+ when(tagTreeImporter.load(any())).thenThrow(DomainException.badRequest(
+ org.raddatz.familienarchiv.exception.ErrorCode.IMPORT_ARTIFACT_INVALID, "bad"));
+ CanonicalImportOrchestrator o = orchestrator(dir);
+
+ o.runImport();
+
+ assertThat(o.getStatus().state()).isEqualTo(ImportStatus.State.FAILED);
+ verify(documentImporter, never()).load(any());
+ }
+
+ @Test
+ void runImportAsync_throwsConflict_whenAlreadyRunning(@TempDir Path dir) {
+ CanonicalImportOrchestrator o = orchestrator(dir);
+ ReflectionTestUtils.setField(o, "currentStatus", new ImportStatus(
+ ImportStatus.State.RUNNING, "IMPORT_RUNNING", "running", 0, List.of(), null));
+
+ assertThatThrownBy(o::runImportAsync)
+ .isInstanceOf(DomainException.class)
+ .hasMessageContaining("already in progress");
+ }
+
+ @Test
+ void runImport_aggregatesDocumentSkips(@TempDir Path dir) throws Exception {
+ writeAllArtifacts(dir);
+ when(documentImporter.load(any())).thenReturn(new DocumentImporter.LoadResult(1,
+ List.of(new ImportStatus.SkippedFile("fake.pdf", ImportStatus.SkipReason.INVALID_PDF_SIGNATURE))));
+ CanonicalImportOrchestrator o = orchestrator(dir);
+
+ o.runImport();
+
+ assertThat(o.getStatus().skipped()).isEqualTo(1);
+ assertThat(o.getStatus().skippedFiles())
+ .extracting(ImportStatus.SkippedFile::filename)
+ .containsExactly("fake.pdf");
+ }
+}
diff --git a/backend/src/test/java/org/raddatz/familienarchiv/importing/CanonicalSheetReaderTest.java b/backend/src/test/java/org/raddatz/familienarchiv/importing/CanonicalSheetReaderTest.java
new file mode 100644
index 00000000..ee1d3650
--- /dev/null
+++ b/backend/src/test/java/org/raddatz/familienarchiv/importing/CanonicalSheetReaderTest.java
@@ -0,0 +1,115 @@
+package org.raddatz.familienarchiv.importing;
+
+import org.apache.poi.ss.usermodel.Row;
+import org.apache.poi.ss.usermodel.Sheet;
+import org.apache.poi.xssf.usermodel.XSSFWorkbook;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.io.TempDir;
+import org.raddatz.familienarchiv.exception.DomainException;
+
+import java.io.OutputStream;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.List;
+
+import static org.assertj.core.api.Assertions.assertThat;
+import static org.assertj.core.api.Assertions.assertThatThrownBy;
+
+class CanonicalSheetReaderTest {
+
+ @Test
+ void readRows_mapsCellsByHeaderName(@TempDir Path tempDir) throws Exception {
+ Path xlsx = write(tempDir, List.of("index", "file"), List.of(List.of("W-0001", "scan.pdf")));
+
+ List rows = CanonicalSheetReader.readRows(xlsx.toFile(), List.of("index", "file"));
+
+ assertThat(rows).hasSize(1);
+ assertThat(rows.get(0).get("index")).isEqualTo("W-0001");
+ assertThat(rows.get(0).get("file")).isEqualTo("scan.pdf");
+ }
+
+ @Test
+ void readRows_throwsBadRequest_whenRequiredHeaderMissing(@TempDir Path tempDir) throws Exception {
+ Path xlsx = write(tempDir, List.of("index"), List.of(List.of("W-0001")));
+
+ assertThatThrownBy(() -> CanonicalSheetReader.readRows(xlsx.toFile(), List.of("index", "file")))
+ .isInstanceOf(DomainException.class)
+ .hasMessageContaining("file");
+ }
+
+ @Test
+ void get_returnsEmptyString_forBlankCell(@TempDir Path tempDir) throws Exception {
+ Path xlsx = write(tempDir, List.of("index", "file"), List.of(List.of("W-0001", "")));
+
+ List rows = CanonicalSheetReader.readRows(xlsx.toFile(), List.of("index", "file"));
+
+ assertThat(rows.get(0).get("file")).isEmpty();
+ }
+
+ @Test
+ void get_returnsEmptyString_forUnknownColumn(@TempDir Path tempDir) throws Exception {
+ Path xlsx = write(tempDir, List.of("index"), List.of(List.of("W-0001")));
+
+ List rows = CanonicalSheetReader.readRows(xlsx.toFile(), List.of("index"));
+
+ assertThat(rows.get(0).get("does_not_exist")).isEmpty();
+ }
+
+ @Test
+ void get_returnsEmptyString_forTrailingColumns_whenRowShorterThanHeader(@TempDir Path tempDir) throws Exception {
+ // POI omits trailing empty cells, so a real-world artifact row can be narrower than
+ // the header. The missing columns must read as "" rather than throwing.
+ Path xlsx = write(tempDir,
+ List.of("index", "file", "summary"),
+ List.of(List.of("W-0001")));
+
+ List rows = CanonicalSheetReader.readRows(xlsx.toFile(), List.of("index", "file", "summary"));
+
+ assertThat(rows.get(0).get("index")).isEqualTo("W-0001");
+ assertThat(rows.get(0).get("file")).isEmpty();
+ assertThat(rows.get(0).get("summary")).isEmpty();
+ }
+
+ @Test
+ void splitList_splitsOnPipe() {
+ assertThat(CanonicalSheetReader.splitList("a|b|c")).containsExactly("a", "b", "c");
+ }
+
+ @Test
+ void splitList_returnsEmptyList_forBlank() {
+ assertThat(CanonicalSheetReader.splitList("")).isEmpty();
+ assertThat(CanonicalSheetReader.splitList(" ")).isEmpty();
+ }
+
+ @Test
+ void splitList_returnsSingleElement_whenNoPipe() {
+ assertThat(CanonicalSheetReader.splitList("solo")).containsExactly("solo");
+ }
+
+ @Test
+ void splitList_trimsAndDropsEmptySegments() {
+ assertThat(CanonicalSheetReader.splitList("a| |b")).containsExactly("a", "b");
+ }
+
+ private Path write(Path dir, List headers, List> dataRows) throws Exception {
+ Path xlsx = dir.resolve("sheet.xlsx");
+ try (XSSFWorkbook wb = new XSSFWorkbook()) {
+ Sheet sheet = wb.createSheet("Sheet1");
+ Row header = sheet.createRow(0);
+ for (int i = 0; i < headers.size(); i++) {
+ header.createCell(i).setCellValue(headers.get(i));
+ }
+ for (int r = 0; r < dataRows.size(); r++) {
+ Row row = sheet.createRow(r + 1);
+ List values = dataRows.get(r);
+ for (int c = 0; c < values.size(); c++) {
+ row.createCell(c).setCellValue(values.get(c));
+ }
+ }
+ try (OutputStream out = Files.newOutputStream(xlsx)) {
+ wb.write(out);
+ }
+ }
+ return xlsx;
+ }
+}
diff --git a/backend/src/test/java/org/raddatz/familienarchiv/importing/DocumentImporterTest.java b/backend/src/test/java/org/raddatz/familienarchiv/importing/DocumentImporterTest.java
new file mode 100644
index 00000000..c97de87b
--- /dev/null
+++ b/backend/src/test/java/org/raddatz/familienarchiv/importing/DocumentImporterTest.java
@@ -0,0 +1,656 @@
+package org.raddatz.familienarchiv.importing;
+
+import org.apache.poi.ss.usermodel.Row;
+import org.apache.poi.ss.usermodel.Sheet;
+import org.apache.poi.xssf.usermodel.XSSFWorkbook;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.extension.ExtendWith;
+import org.junit.jupiter.api.io.TempDir;
+import org.mockito.Mock;
+import org.mockito.junit.jupiter.MockitoExtension;
+import org.raddatz.familienarchiv.document.Document;
+import org.raddatz.familienarchiv.document.DocumentService;
+import org.raddatz.familienarchiv.document.DocumentStatus;
+import org.raddatz.familienarchiv.document.ThumbnailAsyncRunner;
+import org.raddatz.familienarchiv.person.Person;
+import org.raddatz.familienarchiv.person.PersonService;
+import org.raddatz.familienarchiv.person.PersonUpsertCommand;
+import org.raddatz.familienarchiv.tag.Tag;
+import org.raddatz.familienarchiv.tag.TagService;
+import org.springframework.test.util.ReflectionTestUtils;
+import software.amazon.awssdk.core.sync.RequestBody;
+import software.amazon.awssdk.services.s3.S3Client;
+import software.amazon.awssdk.services.s3.model.PutObjectRequest;
+
+import java.io.File;
+import java.io.OutputStream;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.time.LocalDate;
+import java.util.LinkedHashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Optional;
+import java.util.UUID;
+
+import static org.assertj.core.api.Assertions.assertThat;
+import static org.mockito.ArgumentMatchers.any;
+import static org.mockito.Mockito.lenient;
+import static org.mockito.Mockito.never;
+import static org.mockito.Mockito.verify;
+import static org.mockito.Mockito.when;
+
+@ExtendWith(MockitoExtension.class)
+class DocumentImporterTest {
+
+ @Mock DocumentService documentService;
+ @Mock PersonService personService;
+ @Mock TagService tagService;
+ @Mock S3Client s3Client;
+ @Mock ThumbnailAsyncRunner thumbnailAsyncRunner;
+ @Mock FileStreamOpener fileStreamOpener;
+
+ DocumentImporter importer;
+
+ @BeforeEach
+ void setUp() throws java.io.IOException {
+ // Default opener delegates to FileInputStream — tests that need to force an IOException
+ // override this stub locally (load_skipsFile_whenMagicByteCheckThrowsIoException).
+ lenient().when(fileStreamOpener.open(any(File.class)))
+ .thenAnswer(inv -> new java.io.FileInputStream(inv.getArgument(0, File.class)));
+ importer = new DocumentImporter(documentService, personService, tagService, s3Client,
+ thumbnailAsyncRunner, fileStreamOpener);
+ ReflectionTestUtils.setField(importer, "bucketName", "test-bucket");
+ }
+
+ // ─── index validation — a malicious/garbage index can never reach disk I/O ─────────
+
+ @Test
+ void isValidImportIndex_returnsFalse_whenNull() {
+ assertThat(validIndex(null)).isFalse();
+ }
+
+ @Test
+ void isValidImportIndex_returnsFalse_whenBlank() {
+ assertThat(validIndex(" ")).isFalse();
+ }
+
+ @Test
+ void isValidImportIndex_returnsFalse_whenForwardSlash() {
+ assertThat(validIndex("etc/passwd")).isFalse();
+ }
+
+ @Test
+ void isValidImportIndex_returnsFalse_whenBackslash() {
+ assertThat(validIndex("..\\etc\\passwd")).isFalse();
+ }
+
+ @Test
+ void isValidImportIndex_returnsFalse_whenDotDot() {
+ assertThat(validIndex("W-..0001")).isFalse();
+ }
+
+ @Test
+ void isValidImportIndex_returnsFalse_whenIsDotDot() {
+ assertThat(validIndex("..")).isFalse();
+ }
+
+ @Test
+ void isValidImportIndex_returnsFalse_whenSingleDot() {
+ assertThat(validIndex(".")).isFalse();
+ }
+
+ @Test
+ void isValidImportIndex_returnsFalse_whenAbsolutePath() {
+ assertThat(validIndex("/etc/passwd")).isFalse();
+ }
+
+ @Test
+ void isValidImportIndex_returnsFalse_whenNullByte() {
+ assertThat(validIndex("W-0001\0")).isFalse();
+ }
+
+ @Test
+ void isValidImportIndex_returnsFalse_whenUnicodeDivisionSlash() {
+ assertThat(validIndex("W∕0001")).isFalse();
+ }
+
+ @Test
+ void isValidImportIndex_returnsFalse_whenFullwidthSlash() {
+ assertThat(validIndex("W/0001")).isFalse();
+ }
+
+ @Test
+ void isValidImportIndex_returnsFalse_whenReverseSolidusOperator() {
+ assertThat(validIndex("W⧵0001")).isFalse();
+ }
+
+ @Test
+ void isValidImportIndex_returnsFalse_whenContainsDotPdfExtension() {
+ // The index is the bare catalog id; appending ".pdf" is the importer's job. A dot in
+ // the index would let "W-0001.pdf" become "W-0001.pdf.pdf" or smuggle an extension.
+ assertThat(validIndex("W-0001.pdf")).isFalse();
+ }
+
+ // ─── catalog-shape rejects — pass the char pre-checks but must fail INDEX_PATTERN ────
+ // These pin the regex branch itself: each string contains no separator, dot, slash
+ // homoglyph, null byte, or absolute marker, so it sails past every char guard and is
+ // rejected *only* because INDEX_PATTERN.matches() returns false. A weaker pattern would
+ // let them through — these tests would then go red.
+
+ @Test
+ void isValidImportIndex_returnsFalse_whenSpaceInIndex() {
+ // The real-world reject: "J 0070" is a space-typo with no PDF on disk.
+ assertThat(validIndex("J 0070")).isFalse();
+ }
+
+ @Test
+ void isValidImportIndex_returnsFalse_whenFiveLetterPrefix() {
+ // The catalog prefix is at most 4 letters; 5 must not match.
+ assertThat(validIndex("WXYZA-0001")).isFalse();
+ }
+
+ @Test
+ void isValidImportIndex_returnsFalse_whenNoLetterPrefix() {
+ // A digit-led id (no letter prefix) is not a catalog shape.
+ assertThat(validIndex("12-0001")).isFalse();
+ }
+
+ @Test
+ void isValidImportIndex_returnsFalse_whenUppercaseXSuffix() {
+ // Only a lowercase trailing "x" is allowed; an uppercase "X" suffix must fail.
+ assertThat(validIndex("W-0001X")).isFalse();
+ }
+
+ @Test
+ void isValidImportIndex_returnsTrue_whenPlainCatalogIndex() {
+ assertThat(validIndex("W-0124")).isTrue();
+ }
+
+ @Test
+ void isValidImportIndex_returnsTrue_whenTwoLetterPrefix() {
+ assertThat(validIndex("Al-0001")).isTrue();
+ }
+
+ @Test
+ void isValidImportIndex_returnsTrue_whenThreeLetterPrefix() {
+ assertThat(validIndex("CuH-0010")).isTrue();
+ }
+
+ @Test
+ void isValidImportIndex_returnsTrue_whenUmlautPrefix() {
+ // Real corpus indices carry a German umlaut, e.g. "Mü-0001.pdf" exists on disk.
+ assertThat(validIndex("Mü-0001")).isTrue();
+ }
+
+ @Test
+ void isValidImportIndex_returnsTrue_whenDoubleHyphen() {
+ // Real corpus: "C--0029" appears in the spreadsheet (a data-entry artefact, but a
+ // legitimate catalog shape that must still resolve, not crash).
+ assertThat(validIndex("C--0029")).isTrue();
+ }
+
+ @Test
+ void isValidImportIndex_returnsTrue_whenXSuffix() {
+ // The normalizer recognises an x-suffix catalog id; allow it defensively.
+ assertThat(validIndex("W-0001x")).isTrue();
+ }
+
+ // ─── a valid index resolves to exactly importDir/.pdf within containment ─────
+
+ @Test
+ void load_resolvesPdfByIndex_uploadsToS3_andSetsStatusUploaded(@TempDir Path tempDir) throws Exception {
+ ReflectionTestUtils.setField(importer, "importDir", tempDir.toString());
+ byte[] pdf = {0x25, 0x50, 0x44, 0x46, 0x2D};
+ Files.write(tempDir.resolve("W-0124.pdf"), pdf);
+ when(documentService.findByOriginalFilename("W-0124")).thenReturn(Optional.empty());
+ when(documentService.save(any())).thenAnswer(inv -> inv.getArgument(0));
+ Path xlsx = writeDocs(tempDir, docRow("W-0124", "", "", "", "", "", "", "", ""));
+
+ importer.load(xlsx.toFile());
+
+ // exactly importDir/.pdf was uploaded — the S3 key carries that basename
+ org.mockito.ArgumentCaptor bodyCaptor = org.mockito.ArgumentCaptor.forClass(RequestBody.class);
+ verify(s3Client).putObject(any(PutObjectRequest.class), bodyCaptor.capture());
+ verify(documentService).save(org.mockito.ArgumentMatchers.argThat(d ->
+ d.getStatus() == DocumentStatus.UPLOADED
+ && d.getFilePath() != null
+ && d.getFilePath().endsWith("_W-0124.pdf")));
+ }
+
+ @Test
+ void load_yieldsPlaceholder_whenIndexedPdfMissing(@TempDir Path tempDir) throws Exception {
+ ReflectionTestUtils.setField(importer, "importDir", tempDir.toString());
+ when(documentService.findByOriginalFilename("X-9999")).thenReturn(Optional.empty());
+ when(documentService.save(any())).thenAnswer(inv -> inv.getArgument(0));
+ Path xlsx = writeDocs(tempDir, docRow("X-9999", "", "", "", "", "", "", "", ""));
+
+ importer.load(xlsx.toFile());
+
+ verify(documentService).save(org.mockito.ArgumentMatchers.argThat(d -> d.getStatus() == DocumentStatus.PLACEHOLDER));
+ verify(s3Client, never()).putObject(any(PutObjectRequest.class), any(RequestBody.class));
+ }
+
+ @Test
+ void load_rejectsMaliciousIndex_neverReadsOutsideImportDir(@TempDir Path tempDir) throws Exception {
+ // An index with a path separator must be skipped outright, never used for disk I/O.
+ ReflectionTestUtils.setField(importer, "importDir", tempDir.toString());
+ Path xlsx = writeDocs(tempDir, docRow("../../etc/cron.d/x", "", "", "", "", "", "", "", ""));
+
+ DocumentImporter.LoadResult result = importer.load(xlsx.toFile());
+
+ assertThat(result.skippedFiles())
+ .extracting(ImportStatus.SkippedFile::reason)
+ .containsExactly(ImportStatus.SkipReason.INVALID_FILENAME_PATH_TRAVERSAL);
+ verify(documentService, never()).save(any());
+ verify(s3Client, never()).putObject(any(PutObjectRequest.class), any(RequestBody.class));
+ }
+
+ @Test
+ void resolvePdfByIndex_throwsWhenResolvedPathEscapesImportDir_viaSymlink(
+ @TempDir Path importDirPath, @TempDir Path outsideDir) throws Exception {
+ // Containment defense-in-depth: even a syntactically valid index whose .pdf is a
+ // symlink pointing outside importDir must be refused — the resolved canonical path is
+ // asserted to stay inside importDir.
+ Path outsideFile = outsideDir.resolve("secret.pdf");
+ Files.writeString(outsideFile, "sensitive");
+ Files.createSymbolicLink(importDirPath.resolve("W-0001.pdf"), outsideFile);
+ ReflectionTestUtils.setField(importer, "importDir", importDirPath.toString());
+
+ org.assertj.core.api.Assertions.assertThatThrownBy(
+ () -> ReflectionTestUtils.invokeMethod(importer, "resolvePdfByIndex", "W-0001", 2))
+ .isInstanceOf(org.raddatz.familienarchiv.exception.DomainException.class);
+ }
+
+ @Test
+ void resolvePdfByIndex_returnsExactlyImportDirIndexPdf_whenPresent(@TempDir Path tempDir) throws Exception {
+ ReflectionTestUtils.setField(importer, "importDir", tempDir.toString());
+ Path expected = tempDir.resolve("Eu-0628.pdf");
+ Files.writeString(expected, "%PDF-1.4");
+
+ Optional resolved = ReflectionTestUtils.invokeMethod(importer, "resolvePdfByIndex", "Eu-0628", 2);
+
+ assertThat(resolved).isPresent();
+ assertThat(resolved.get().getCanonicalFile()).isEqualTo(expected.toFile().getCanonicalFile());
+ }
+
+ // NOTE (Sara, PR #687): the IOException branch of resolvePdfByIndex — where
+ // File.getCanonicalPath() itself throws (an OS-level failure mid-resolution, not the
+ // symlink-escape DomainException) — is intentionally NOT covered by a test. Unlike
+ // isPdfMagicBytes, which has the package-private openFileStream(File) seam a Mockito spy can
+ // make throw, getCanonicalPath() is called on a File built internally with no injection seam,
+ // and there is no portable, deterministic way to make it throw on a temp file (it does not
+ // throw for missing/symlinked paths — those are handled by isFile()/the containment check).
+ // Adding a seam purely to test this would be production code in service of a non-defect; the
+ // substantive fix is the log.warn() now emitted in that branch so the quiet skip surfaces in
+ // ops. Left uncovered by deliberate decision, documented here so the branch is not assumed
+ // tested.
+
+ // ─── PDF magic-byte guard — ported — do not remove ──────────────────────────────
+
+ @Test
+ void load_skipsFile_whenNotPdfMagicBytes(@TempDir Path tempDir) throws Exception {
+ ReflectionTestUtils.setField(importer, "importDir", tempDir.toString());
+ Files.writeString(tempDir.resolve("W-0001.pdf"), "not a pdf");
+ lenient().when(documentService.findByOriginalFilename(any())).thenReturn(Optional.empty());
+ Path xlsx = writeDocs(tempDir, docRow("W-0001", "", "", "", "", "", "", "", ""));
+
+ DocumentImporter.LoadResult result = importer.load(xlsx.toFile());
+
+ assertThat(result.skippedFiles())
+ .extracting(ImportStatus.SkippedFile::reason)
+ .containsExactly(ImportStatus.SkipReason.INVALID_PDF_SIGNATURE);
+ verify(s3Client, never()).putObject(any(PutObjectRequest.class), any(RequestBody.class));
+ }
+
+ @Test
+ void load_skipsFile_whenMagicByteCheckThrowsIoException(@TempDir Path tempDir) throws Exception {
+ ReflectionTestUtils.setField(importer, "importDir", tempDir.toString());
+ Files.writeString(tempDir.resolve("W-0001.pdf"), "content");
+ lenient().when(documentService.findByOriginalFilename(any())).thenReturn(Optional.empty());
+ Path xlsx = writeDocs(tempDir, docRow("W-0001", "", "", "", "", "", "", "", ""));
+
+ // FileStreamOpener is injected — stub it to throw, no spy on the importer needed.
+ org.mockito.Mockito.when(fileStreamOpener.open(any(File.class)))
+ .thenThrow(new java.io.IOException("read error"));
+
+ DocumentImporter.LoadResult result = importer.load(xlsx.toFile());
+
+ assertThat(result.skippedFiles())
+ .extracting(ImportStatus.SkippedFile::reason)
+ .containsExactly(ImportStatus.SkipReason.FILE_READ_ERROR);
+ }
+
+ @Test
+ void load_skipsAlreadyExists_whenDocumentUploadedNotPlaceholder(@TempDir Path tempDir) throws Exception {
+ ReflectionTestUtils.setField(importer, "importDir", tempDir.toString());
+ Document existing = Document.builder().id(UUID.randomUUID())
+ .originalFilename("W-0001").status(DocumentStatus.UPLOADED).build();
+ when(documentService.findByOriginalFilename("W-0001")).thenReturn(Optional.of(existing));
+ Path xlsx = writeDocs(tempDir, docRow("W-0001", "", "", "", "", "", "", "", ""));
+
+ DocumentImporter.LoadResult result = importer.load(xlsx.toFile());
+
+ assertThat(result.skippedFiles())
+ .extracting(ImportStatus.SkippedFile::reason)
+ .containsExactly(ImportStatus.SkipReason.ALREADY_EXISTS);
+ verify(documentService, never()).save(any());
+ }
+
+ // ─── presence of importDir/.pdf drives status: present → UPLOADED, absent → PLACEHOLDER ─
+
+ @Test
+ void load_setsStatusPlaceholder_whenNoIndexedPdf(@TempDir Path tempDir) throws Exception {
+ ReflectionTestUtils.setField(importer, "importDir", tempDir.toString());
+ when(documentService.findByOriginalFilename("W-0099")).thenReturn(Optional.empty());
+ when(documentService.save(any())).thenAnswer(inv -> inv.getArgument(0));
+ Path xlsx = writeDocs(tempDir, docRow("W-0099", "", "", "", "", "", "", "", ""));
+
+ importer.load(xlsx.toFile());
+
+ verify(documentService).save(org.mockito.ArgumentMatchers.argThat(d -> d.getStatus() == DocumentStatus.PLACEHOLDER));
+ verify(s3Client, never()).putObject(any(PutObjectRequest.class), any(RequestBody.class));
+ }
+
+ // ─── attribution routing — register-first + always retain raw ────────────────────
+
+ @Test
+ void load_linksRegisterSender_andRetainsRawSenderText(@TempDir Path tempDir) throws Exception {
+ ReflectionTestUtils.setField(importer, "importDir", tempDir.toString());
+ Person walter = Person.builder().id(UUID.randomUUID()).sourceRef("de-gruyter-walter")
+ .firstName("Walter").lastName("de Gruyter").build();
+ when(documentService.findByOriginalFilename("W-0001")).thenReturn(Optional.empty());
+ when(documentService.save(any())).thenAnswer(inv -> inv.getArgument(0));
+ when(personService.findBySourceRef("de-gruyter-walter")).thenReturn(Optional.of(walter));
+ Path xlsx = writeDocs(tempDir, docRow("W-0001", "de-gruyter-walter", "Walter de Gruyter",
+ "", "", "", "", "", ""));
+
+ importer.load(xlsx.toFile());
+
+ verify(documentService).save(org.mockito.ArgumentMatchers.argThat(d ->
+ d.getSender() == walter && "Walter de Gruyter".equals(d.getSenderText())));
+ }
+
+ @Test
+ void load_createsProvisionalSender_whenSlugUnmatchedInRegister(@TempDir Path tempDir) throws Exception {
+ ReflectionTestUtils.setField(importer, "importDir", tempDir.toString());
+ Person provisional = Person.builder().id(UUID.randomUUID()).sourceRef("schwester-hanni")
+ .lastName("Schwester Hanni").provisional(true).build();
+ when(documentService.findByOriginalFilename("W-0002")).thenReturn(Optional.empty());
+ when(documentService.save(any())).thenAnswer(inv -> inv.getArgument(0));
+ when(personService.findBySourceRef("schwester-hanni")).thenReturn(Optional.empty());
+ when(personService.upsertBySourceRef(any())).thenReturn(provisional);
+ Path xlsx = writeDocs(tempDir, docRow("W-0002", "schwester-hanni", "Schwester Hanni",
+ "", "", "", "", "", ""));
+
+ importer.load(xlsx.toFile());
+
+ org.mockito.ArgumentCaptor captor =
+ org.mockito.ArgumentCaptor.forClass(PersonUpsertCommand.class);
+ verify(personService).upsertBySourceRef(captor.capture());
+ assertThat(captor.getValue().provisional()).isTrue();
+ assertThat(captor.getValue().lastName()).isEqualTo("Schwester Hanni");
+ }
+
+ @Test
+ void load_createsNoSenderPerson_whenSlugEmptyButRawPresent(@TempDir Path tempDir) throws Exception {
+ ReflectionTestUtils.setField(importer, "importDir", tempDir.toString());
+ when(documentService.findByOriginalFilename("W-0003")).thenReturn(Optional.empty());
+ when(documentService.save(any())).thenAnswer(inv -> inv.getArgument(0));
+ Path xlsx = writeDocs(tempDir, docRow("W-0003", "", "?",
+ "", "", "", "", "", ""));
+
+ importer.load(xlsx.toFile());
+
+ verify(personService, never()).findBySourceRef(any());
+ verify(personService, never()).upsertBySourceRef(any());
+ verify(documentService).save(org.mockito.ArgumentMatchers.argThat(d ->
+ d.getSender() == null && "?".equals(d.getSenderText())));
+ }
+
+ @Test
+ void load_splitsMultipleReceivers_andRetainsRawReceiverText(@TempDir Path tempDir) throws Exception {
+ ReflectionTestUtils.setField(importer, "importDir", tempDir.toString());
+ Person herbert = Person.builder().id(UUID.randomUUID()).sourceRef("cram-herbert").lastName("Cram").build();
+ Person clara = Person.builder().id(UUID.randomUUID()).sourceRef("clara").lastName("Clara").build();
+ when(documentService.findByOriginalFilename("W-0004")).thenReturn(Optional.empty());
+ when(documentService.save(any())).thenAnswer(inv -> inv.getArgument(0));
+ when(personService.findBySourceRef("cram-herbert")).thenReturn(Optional.of(herbert));
+ when(personService.findBySourceRef("clara")).thenReturn(Optional.of(clara));
+ Path xlsx = writeDocs(tempDir, docRow("W-0004", "", "",
+ "cram-herbert|clara", "Herbert Cram|Clara", "", "", "", ""));
+
+ importer.load(xlsx.toFile());
+
+ verify(documentService).save(org.mockito.ArgumentMatchers.argThat(d ->
+ d.getReceivers().size() == 2
+ && d.getReceivers().contains(herbert)
+ && d.getReceivers().contains(clara)
+ && "Herbert Cram|Clara".equals(d.getReceiverText())));
+ }
+
+ @Test
+ void load_provisionalReceiverUsesHumanNameFromReceiverNames_notSlug(@TempDir Path tempDir) throws Exception {
+ // Regression: resolveReceivers used to pass the slug as both `sourceRef` AND `lastName`,
+ // so an unresolved receiver "smith-john" became a provisional Person with
+ // lastName="smith-john". The fix consumes the parallel `receiver_names` column.
+ ReflectionTestUtils.setField(importer, "importDir", tempDir.toString());
+ Person provisional = Person.builder().id(UUID.randomUUID()).sourceRef("smith-john")
+ .lastName("John Smith").provisional(true).build();
+ when(documentService.findByOriginalFilename("W-0050")).thenReturn(Optional.empty());
+ when(documentService.save(any())).thenAnswer(inv -> inv.getArgument(0));
+ when(personService.findBySourceRef("smith-john")).thenReturn(Optional.empty());
+ when(personService.upsertBySourceRef(any())).thenReturn(provisional);
+ Path xlsx = writeDocs(tempDir, docRow("W-0050", "", "",
+ "smith-john", "John Smith", "", "", "", ""));
+
+ importer.load(xlsx.toFile());
+
+ org.mockito.ArgumentCaptor captor =
+ org.mockito.ArgumentCaptor.forClass(PersonUpsertCommand.class);
+ verify(personService).upsertBySourceRef(captor.capture());
+ assertThat(captor.getValue().sourceRef()).isEqualTo("smith-john");
+ assertThat(captor.getValue().lastName()).isEqualTo("John Smith");
+ assertThat(captor.getValue().provisional()).isTrue();
+ }
+
+ @Test
+ void load_provisionalReceiverFallsBackToSlug_whenNamesListShorterThanSlugs(@TempDir Path tempDir) throws Exception {
+ // Parallel-list zip: if the names list is shorter than the slugs list, slugs without a
+ // matching name fall back to slug as the display name. This is the "missing name" case
+ // (rare in canonical data but the contract must define it).
+ ReflectionTestUtils.setField(importer, "importDir", tempDir.toString());
+ Person alice = Person.builder().id(UUID.randomUUID()).sourceRef("alice-jones")
+ .lastName("Alice Jones").provisional(true).build();
+ Person bob = Person.builder().id(UUID.randomUUID()).sourceRef("bob-roe")
+ .lastName("bob-roe").provisional(true).build();
+ when(documentService.findByOriginalFilename("W-0051")).thenReturn(Optional.empty());
+ when(documentService.save(any())).thenAnswer(inv -> inv.getArgument(0));
+ when(personService.findBySourceRef("alice-jones")).thenReturn(Optional.empty());
+ when(personService.findBySourceRef("bob-roe")).thenReturn(Optional.empty());
+ when(personService.upsertBySourceRef(any())).thenReturn(alice).thenReturn(bob);
+ Path xlsx = writeDocs(tempDir, docRow("W-0051", "", "",
+ "alice-jones|bob-roe", "Alice Jones", "", "", "", ""));
+
+ importer.load(xlsx.toFile());
+
+ org.mockito.ArgumentCaptor captor =
+ org.mockito.ArgumentCaptor.forClass(PersonUpsertCommand.class);
+ verify(personService, org.mockito.Mockito.times(2)).upsertBySourceRef(captor.capture());
+ assertThat(captor.getAllValues()).extracting(PersonUpsertCommand::sourceRef)
+ .containsExactly("alice-jones", "bob-roe");
+ assertThat(captor.getAllValues()).extracting(PersonUpsertCommand::lastName)
+ .containsExactly("Alice Jones", "bob-roe");
+ }
+
+ // ─── clean date values parse without semantic logic ──────────────────────────────
+
+ @Test
+ void load_parsesCleanDateAndPrecision(@TempDir Path tempDir) throws Exception {
+ ReflectionTestUtils.setField(importer, "importDir", tempDir.toString());
+ when(documentService.findByOriginalFilename("W-0005")).thenReturn(Optional.empty());
+ when(documentService.save(any())).thenAnswer(inv -> inv.getArgument(0));
+ Path xlsx = writeDocs(tempDir, docRow("W-0005", "", "",
+ "", "", "1916-06-01", "1.6.1916", "MONTH", ""));
+
+ importer.load(xlsx.toFile());
+
+ verify(documentService).save(org.mockito.ArgumentMatchers.argThat(d ->
+ LocalDate.of(1916, 6, 1).equals(d.getDocumentDate())
+ && d.getMetaDatePrecision() == org.raddatz.familienarchiv.document.DatePrecision.MONTH
+ && "1.6.1916".equals(d.getMetaDateRaw())));
+ }
+
+ @Test
+ void load_attachesTagBySourceRef(@TempDir Path tempDir) throws Exception {
+ ReflectionTestUtils.setField(importer, "importDir", tempDir.toString());
+ Tag tag = Tag.builder().id(UUID.randomUUID()).name("Brautbriefe").sourceRef("Themen/Brautbriefe").build();
+ when(documentService.findByOriginalFilename("W-0006")).thenReturn(Optional.empty());
+ when(documentService.save(any())).thenAnswer(inv -> inv.getArgument(0));
+ when(tagService.findBySourceRef("Themen/Brautbriefe")).thenReturn(Optional.of(tag));
+ Path xlsx = writeDocs(tempDir, docRowWithTag("W-0006", "Themen/Brautbriefe"));
+
+ importer.load(xlsx.toFile());
+
+ verify(documentService).save(org.mockito.ArgumentMatchers.argThat(d -> d.getTags().contains(tag)));
+ }
+
+ // ─── idempotency — update existing document in place by index ─────────────────────
+
+ @Test
+ void load_updatesExistingDocumentInPlace_whenIndexExists(@TempDir Path tempDir) throws Exception {
+ ReflectionTestUtils.setField(importer, "importDir", tempDir.toString());
+ Document existing = Document.builder().id(UUID.randomUUID())
+ .originalFilename("W-0007").status(DocumentStatus.PLACEHOLDER).build();
+ when(documentService.findByOriginalFilename("W-0007")).thenReturn(Optional.of(existing));
+ when(documentService.save(any())).thenAnswer(inv -> inv.getArgument(0));
+ Path xlsx = writeDocs(tempDir, docRow("W-0007", "", "", "", "", "", "", "", ""));
+
+ importer.load(xlsx.toFile());
+
+ verify(documentService).save(org.mockito.ArgumentMatchers.argThat(d -> d.getId().equals(existing.getId())));
+ }
+
+ // ─── canonical collections are authoritative — re-import prunes removed links ──────
+
+ @Test
+ void load_prunesReceiversAndTags_whenCanonicalRowShrinks(@TempDir Path tempDir) throws Exception {
+ ReflectionTestUtils.setField(importer, "importDir", tempDir.toString());
+ Person staleReceiver = Person.builder().id(UUID.randomUUID()).sourceRef("stale-receiver").lastName("Stale").build();
+ Tag staleTag = Tag.builder().id(UUID.randomUUID()).name("Stale").sourceRef("Themen/Stale").build();
+ Document existing = Document.builder().id(UUID.randomUUID())
+ .originalFilename("W-0008").status(DocumentStatus.PLACEHOLDER).build();
+ existing.getReceivers().add(staleReceiver);
+ existing.getTags().add(staleTag);
+ when(documentService.findByOriginalFilename("W-0008")).thenReturn(Optional.of(existing));
+ when(documentService.save(any())).thenAnswer(inv -> inv.getArgument(0));
+ // The canonical row now carries no receiver and no tag: both stale links must go.
+ Path xlsx = writeDocs(tempDir, docRow("W-0008", "", "", "", "", "", "", "", ""));
+
+ importer.load(xlsx.toFile());
+
+ verify(documentService).save(org.mockito.ArgumentMatchers.argThat(d ->
+ d.getReceivers().isEmpty() && d.getTags().isEmpty()));
+ }
+
+ // ─── title carries the honest date label — never a precision the data lacks ───────
+
+ @Test
+ void load_buildsTitleWithMonthLabel_whenPrecisionIsMonth(@TempDir Path tempDir) throws Exception {
+ ReflectionTestUtils.setField(importer, "importDir", tempDir.toString());
+ when(documentService.findByOriginalFilename("W-0100")).thenReturn(Optional.empty());
+ when(documentService.save(any())).thenAnswer(inv -> inv.getArgument(0));
+ Path xlsx = writeDocs(tempDir, docRow("W-0100", "", "", "", "",
+ "1916-06-01", "Juni 1916", "MONTH", ""));
+
+ importer.load(xlsx.toFile());
+
+ verify(documentService).save(org.mockito.ArgumentMatchers.argThat(d ->
+ d.getTitle().contains("Juni 1916") && !d.getTitle().contains("1. Juni")));
+ }
+
+ @Test
+ void load_buildsTitleWithFullDate_whenPrecisionIsDay(@TempDir Path tempDir) throws Exception {
+ ReflectionTestUtils.setField(importer, "importDir", tempDir.toString());
+ when(documentService.findByOriginalFilename("W-0101")).thenReturn(Optional.empty());
+ when(documentService.save(any())).thenAnswer(inv -> inv.getArgument(0));
+ Path xlsx = writeDocs(tempDir, docRow("W-0101", "", "", "", "",
+ "1943-12-24", "24.12.1943", "DAY", ""));
+
+ importer.load(xlsx.toFile());
+
+ verify(documentService).save(org.mockito.ArgumentMatchers.argThat(d ->
+ d.getTitle().contains("24. Dezember 1943")));
+ }
+
+ @Test
+ void load_buildsTitleFromIndexOnly_whenDateUnknown(@TempDir Path tempDir) throws Exception {
+ ReflectionTestUtils.setField(importer, "importDir", tempDir.toString());
+ when(documentService.findByOriginalFilename("W-0102")).thenReturn(Optional.empty());
+ when(documentService.save(any())).thenAnswer(inv -> inv.getArgument(0));
+ Path xlsx = writeDocs(tempDir, docRow("W-0102", "", "", "", "",
+ "", "?", "UNKNOWN", ""));
+
+ importer.load(xlsx.toFile());
+
+ verify(documentService).save(org.mockito.ArgumentMatchers.argThat(d ->
+ d.getTitle().equals("W-0102")));
+ }
+
+ // ─── helpers ─────────────────────────────────────────────────────────────────────
+
+ private Boolean validIndex(String index) {
+ return ReflectionTestUtils.invokeMethod(importer, "isValidImportIndex", index);
+ }
+
+ private Map docRow(String index, String senderId, String senderName,
+ String receiverIds, String receiverNames, String dateIso,
+ String dateRaw, String datePrecision, String dateEnd) {
+ Map r = new LinkedHashMap<>();
+ r.put("index", index);
+ r.put("sender_person_id", senderId);
+ r.put("sender_name", senderName);
+ r.put("receiver_person_ids", receiverIds);
+ r.put("receiver_names", receiverNames);
+ r.put("date_iso", dateIso);
+ r.put("date_raw", dateRaw);
+ r.put("date_precision", datePrecision);
+ r.put("date_end", dateEnd);
+ r.put("location", "");
+ r.put("tags", "");
+ r.put("summary", "");
+ return r;
+ }
+
+ private Map docRowWithTag(String index, String tagPath) {
+ Map r = docRow(index, "", "", "", "", "", "", "", "");
+ r.put("tags", tagPath);
+ return r;
+ }
+
+ @SafeVarargs
+ private Path writeDocs(Path dir, Map... rows) throws Exception {
+ Path xlsx = dir.resolve("canonical-documents.xlsx");
+ List headers = List.of("index", "sender_person_id", "sender_name",
+ "receiver_person_ids", "receiver_names", "date_iso", "date_raw", "date_precision",
+ "date_end", "location", "tags", "summary");
+ try (XSSFWorkbook wb = new XSSFWorkbook()) {
+ Sheet sheet = wb.createSheet("Sheet1");
+ Row header = sheet.createRow(0);
+ for (int i = 0; i < headers.size(); i++) {
+ header.createCell(i).setCellValue(headers.get(i));
+ }
+ for (int r = 0; r < rows.length; r++) {
+ Row row = sheet.createRow(r + 1);
+ for (int c = 0; c < headers.size(); c++) {
+ row.createCell(c).setCellValue(rows[r].getOrDefault(headers.get(c), ""));
+ }
+ }
+ try (OutputStream out = Files.newOutputStream(xlsx)) {
+ wb.write(out);
+ }
+ }
+ return xlsx;
+ }
+}
diff --git a/backend/src/test/java/org/raddatz/familienarchiv/importing/DocumentTitleFormatterTest.java b/backend/src/test/java/org/raddatz/familienarchiv/importing/DocumentTitleFormatterTest.java
new file mode 100644
index 00000000..d8f66b6e
--- /dev/null
+++ b/backend/src/test/java/org/raddatz/familienarchiv/importing/DocumentTitleFormatterTest.java
@@ -0,0 +1,49 @@
+package org.raddatz.familienarchiv.importing;
+
+import com.fasterxml.jackson.databind.JsonNode;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import org.junit.jupiter.api.DynamicTest;
+import org.junit.jupiter.api.TestFactory;
+import org.raddatz.familienarchiv.document.DatePrecision;
+
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.time.LocalDate;
+import java.util.ArrayList;
+import java.util.List;
+
+import static org.assertj.core.api.Assertions.assertThat;
+
+/**
+ * Asserts the Java title label against the SAME shared fixture table the TS
+ * formatter spec uses ({@code docs/date-label-fixtures.json}). This is the
+ * drift guard requested in #666 review: the two label implementations cannot
+ * silently diverge (en-dash vs hyphen, "ca." vs "circa", season words, range
+ * collapse) because both are pinned to one committed rule set.
+ */
+class DocumentTitleFormatterTest {
+
+ @TestFactory
+ List matchesSharedFixtureTable() throws Exception {
+ // Maven runs tests from the backend/ module dir; the fixture lives at repo-root docs/.
+ Path fixture = Path.of("..", "docs", "date-label-fixtures.json");
+ JsonNode root = new ObjectMapper().readTree(Files.readString(fixture));
+ List tests = new ArrayList<>();
+ for (JsonNode c : root.get("cases")) {
+ String name = c.get("name").asText();
+ LocalDate anchor = parseDate(c.get("anchor"));
+ DatePrecision precision = DatePrecision.valueOf(c.get("precision").asText());
+ LocalDate end = parseDate(c.get("end"));
+ String raw = c.get("raw").isNull() ? null : c.get("raw").asText();
+ String expected = c.get("expected").asText();
+ tests.add(DynamicTest.dynamicTest(name, () ->
+ assertThat(DocumentTitleFormatter.formatTitleDate(anchor, precision, end, raw))
+ .isEqualTo(expected)));
+ }
+ return tests;
+ }
+
+ private static LocalDate parseDate(JsonNode node) {
+ return node == null || node.isNull() ? null : LocalDate.parse(node.asText());
+ }
+}
diff --git a/backend/src/test/java/org/raddatz/familienarchiv/importing/MassImportServiceTest.java b/backend/src/test/java/org/raddatz/familienarchiv/importing/MassImportServiceTest.java
deleted file mode 100644
index d87d28c1..00000000
--- a/backend/src/test/java/org/raddatz/familienarchiv/importing/MassImportServiceTest.java
+++ /dev/null
@@ -1,896 +0,0 @@
-package org.raddatz.familienarchiv.importing;
-
-import org.junit.jupiter.api.BeforeEach;
-import org.junit.jupiter.api.Test;
-import org.junit.jupiter.api.extension.ExtendWith;
-import org.junit.jupiter.api.io.TempDir;
-import org.mockito.Mock;
-import org.mockito.junit.jupiter.MockitoExtension;
-import org.raddatz.familienarchiv.exception.DomainException;
-import org.raddatz.familienarchiv.document.Document;
-import org.raddatz.familienarchiv.document.DocumentService;
-import org.raddatz.familienarchiv.document.DocumentStatus;
-import org.raddatz.familienarchiv.document.ThumbnailAsyncRunner;
-import org.raddatz.familienarchiv.person.Person;
-import org.raddatz.familienarchiv.tag.Tag;
-import org.raddatz.familienarchiv.tag.TagService;
-import org.raddatz.familienarchiv.person.PersonService;
-import org.springframework.test.util.ReflectionTestUtils;
-import software.amazon.awssdk.core.sync.RequestBody;
-import software.amazon.awssdk.services.s3.S3Client;
-import software.amazon.awssdk.services.s3.model.PutObjectRequest;
-
-import org.apache.poi.xssf.usermodel.XSSFWorkbook;
-import org.xml.sax.SAXParseException;
-
-import java.io.File;
-import java.io.OutputStream;
-import java.io.ByteArrayOutputStream;
-import java.nio.charset.StandardCharsets;
-import java.nio.file.Files;
-import java.nio.file.Path;
-import java.time.LocalDate;
-import java.time.LocalDateTime;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Optional;
-import java.util.UUID;
-import java.util.zip.ZipEntry;
-import java.util.zip.ZipOutputStream;
-
-import static org.assertj.core.api.Assertions.assertThat;
-import static org.assertj.core.api.Assertions.assertThatThrownBy;
-import static org.mockito.ArgumentMatchers.any;
-import static org.mockito.Mockito.*;
-
-@ExtendWith(MockitoExtension.class)
-class MassImportServiceTest {
-
- @Mock DocumentService documentService;
- @Mock PersonService personService;
- @Mock TagService tagService;
- @Mock S3Client s3Client;
- @Mock ThumbnailAsyncRunner thumbnailAsyncRunner;
-
- MassImportService service;
-
- @BeforeEach
- void setUp() {
- service = new MassImportService(documentService, personService, tagService, s3Client, thumbnailAsyncRunner);
- ReflectionTestUtils.setField(service, "bucketName", "test-bucket");
- ReflectionTestUtils.setField(service, "importDir", "/import");
- ReflectionTestUtils.setField(service, "colIndex", 0);
- ReflectionTestUtils.setField(service, "colBox", 1);
- ReflectionTestUtils.setField(service, "colFolder", 2);
- ReflectionTestUtils.setField(service, "colSender", 3);
- ReflectionTestUtils.setField(service, "colReceivers", 5);
- ReflectionTestUtils.setField(service, "colDate", 7);
- ReflectionTestUtils.setField(service, "colLocation", 9);
- ReflectionTestUtils.setField(service, "colTags", 10);
- ReflectionTestUtils.setField(service, "colSummary", 11);
- ReflectionTestUtils.setField(service, "colTranscription", 13);
- }
-
- // ─── getStatus ────────────────────────────────────────────────────────────
-
- @Test
- void getStatus_returnsIdleByDefault() {
- assertThat(service.getStatus().state()).isEqualTo(MassImportService.State.IDLE);
- }
-
- @Test
- void getStatus_hasStatusCode_IMPORT_IDLE_byDefault() {
- assertThat(service.getStatus().statusCode()).isEqualTo("IMPORT_IDLE");
- }
-
- // ─── runImportAsync ───────────────────────────────────────────────────────
-
- @Test
- void runImportAsync_setsFailedStatus_whenImportDirectoryDoesNotExist() {
- // /import directory doesn't exist in test environment → IOException → IMPORT_FAILED_INTERNAL
- service.runImportAsync();
-
- assertThat(service.getStatus().state()).isEqualTo(MassImportService.State.FAILED);
- assertThat(service.getStatus().statusCode()).isEqualTo("IMPORT_FAILED_INTERNAL");
- }
-
- @Test
- void runImportAsync_readsFromConfiguredImportDir(@TempDir Path tempDir) {
- // Empty temp dir → findSpreadsheetFile throws "no spreadsheet" with the
- // configured path in the message. Proves the field, not a constant,
- // drives the lookup.
- ReflectionTestUtils.setField(service, "importDir", tempDir.toString());
-
- service.runImportAsync();
-
- assertThat(service.getStatus().state()).isEqualTo(MassImportService.State.FAILED);
- assertThat(service.getStatus().message()).contains(tempDir.toString());
- }
-
- @Test
- void runImportAsync_setsStatusCode_IMPORT_FAILED_NO_SPREADSHEET_whenDirIsEmpty(@TempDir Path tempDir) {
- ReflectionTestUtils.setField(service, "importDir", tempDir.toString());
-
- service.runImportAsync();
-
- assertThat(service.getStatus().statusCode()).isEqualTo("IMPORT_FAILED_NO_SPREADSHEET");
- }
-
- @Test
- void runImportAsync_setsStatusCode_IMPORT_DONE_whenSpreadsheetHasNoDataRows(@TempDir Path tempDir) throws Exception {
- Path xlsx = tempDir.resolve("import.xlsx");
- try (XSSFWorkbook wb = new XSSFWorkbook()) {
- wb.createSheet("Sheet1");
- try (OutputStream out = Files.newOutputStream(xlsx)) {
- wb.write(out);
- }
- }
- ReflectionTestUtils.setField(service, "importDir", tempDir.toString());
-
- service.runImportAsync();
-
- assertThat(service.getStatus().statusCode()).isEqualTo("IMPORT_DONE");
- }
-
- @Test
- void runImportAsync_throwsConflict_whenAlreadyRunning() {
- MassImportService.ImportStatus running = new MassImportService.ImportStatus(
- MassImportService.State.RUNNING, "IMPORT_RUNNING", "Running...", 0, List.of(), LocalDateTime.now());
- ReflectionTestUtils.setField(service, "currentStatus", running);
-
- assertThatThrownBy(() -> service.runImportAsync())
- .isInstanceOf(DomainException.class)
- .hasMessageContaining("already in progress");
- }
-
- // ─── importSingleDocument — skip already uploaded ─────────────────────────
-
- @Test
- void importSingleDocument_skips_whenDocumentAlreadyUploadedNotPlaceholder() {
- Document existing = Document.builder()
- .id(UUID.randomUUID())
- .originalFilename("doc001.pdf")
- .status(DocumentStatus.UPLOADED)
- .build();
- when(documentService.findByOriginalFilename("doc001.pdf")).thenReturn(Optional.of(existing));
-
- Optional result = service.importSingleDocument(minimalCells("doc001.pdf"), Optional.empty(), "doc001.pdf", "doc001");
-
- verify(documentService, never()).save(any());
- assertThat(result).isPresent().contains(MassImportService.SkipReason.ALREADY_EXISTS);
- }
-
- // ─── importSingleDocument — already-exists guard fires before file I/O ─────
-
- @Test
- void importSingleDocument_skipsWithAlreadyExists_whenDocumentUploadedAndFileIsPresent(@TempDir Path tempDir) throws Exception {
- // Document already exists with status UPLOADED (not PLACEHOLDER).
- // A physical PDF file is also present on disk (valid magic bytes).
- // Expected: ALREADY_EXISTS is returned and no S3 upload is attempted —
- // the guard fires before any file I/O, so no partial processing occurs.
- Document existing = Document.builder()
- .id(UUID.randomUUID())
- .originalFilename("present.pdf")
- .status(DocumentStatus.UPLOADED)
- .build();
- when(documentService.findByOriginalFilename("present.pdf")).thenReturn(Optional.of(existing));
-
- Path physicalFile = tempDir.resolve("present.pdf");
- byte[] pdfHeader = {0x25, 0x50, 0x44, 0x46, 0x2D}; // %PDF-
- Files.write(physicalFile, pdfHeader);
-
- Optional result = service.importSingleDocument(
- minimalCells("present.pdf"), Optional.of(physicalFile.toFile()), "present.pdf", "present");
-
- assertThat(result).isPresent().contains(MassImportService.SkipReason.ALREADY_EXISTS);
- verify(s3Client, never()).putObject(any(PutObjectRequest.class), any(RequestBody.class));
- verify(documentService, never()).save(any());
- }
-
- // ─── importSingleDocument — S3 failure surfaced in skippedFiles ──────────
-
- @Test
- void runImportAsync_addsS3UploadFailed_toSkippedFiles_whenS3Throws(@TempDir Path tempDir) throws Exception {
- byte[] pdfHeader = {0x25, 0x50, 0x44, 0x46, 0x2D}; // %PDF-
- Files.write(tempDir.resolve("upload_fail.pdf"), pdfHeader);
- buildMinimalImportXlsx(tempDir, "upload_fail.pdf");
- ReflectionTestUtils.setField(service, "importDir", tempDir.toString());
- when(documentService.findByOriginalFilename("upload_fail.pdf")).thenReturn(Optional.empty());
- doThrow(new RuntimeException("S3 unavailable"))
- .when(s3Client).putObject(any(PutObjectRequest.class), any(RequestBody.class));
-
- service.runImportAsync();
-
- assertThat(service.getStatus().skipped()).isEqualTo(1);
- assertThat(service.getStatus().skippedFiles())
- .extracting(MassImportService.SkippedFile::filename, MassImportService.SkippedFile::reason)
- .containsExactly(org.assertj.core.groups.Tuple.tuple("upload_fail.pdf", MassImportService.SkipReason.S3_UPLOAD_FAILED));
- }
-
- @Test
- void runImportAsync_addsAlreadyExists_toSkippedFiles_whenDocumentAlreadyUploaded(@TempDir Path tempDir) throws Exception {
- buildMinimalImportXlsx(tempDir, "existing.pdf");
- ReflectionTestUtils.setField(service, "importDir", tempDir.toString());
- Document existing = Document.builder()
- .id(UUID.randomUUID())
- .originalFilename("existing.pdf")
- .status(DocumentStatus.UPLOADED)
- .build();
- when(documentService.findByOriginalFilename("existing.pdf")).thenReturn(Optional.of(existing));
-
- service.runImportAsync();
-
- assertThat(service.getStatus().skipped()).isEqualTo(1);
- assertThat(service.getStatus().skippedFiles())
- .extracting(MassImportService.SkippedFile::reason)
- .containsExactly(MassImportService.SkipReason.ALREADY_EXISTS);
- }
-
- // ─── importSingleDocument — create new document (metadata only) ───────────
-
- @Test
- void importSingleDocument_createsNewDocument_whenNotExists() {
- when(documentService.findByOriginalFilename("doc002.pdf")).thenReturn(Optional.empty());
- when(documentService.save(any())).thenAnswer(inv -> inv.getArgument(0));
-
- service.importSingleDocument(minimalCells("doc002.pdf"), Optional.empty(), "doc002.pdf", "doc002");
-
- verify(documentService).save(argThat(d ->
- d.getOriginalFilename().equals("doc002.pdf")
- && d.getStatus() == DocumentStatus.PLACEHOLDER));
- }
-
- // ─── importSingleDocument — update existing placeholder ──────────────────
-
- @Test
- void importSingleDocument_updatesExistingPlaceholder() {
- Document placeholder = Document.builder()
- .id(UUID.randomUUID())
- .originalFilename("existing.pdf")
- .status(DocumentStatus.PLACEHOLDER)
- .build();
- when(documentService.findByOriginalFilename("existing.pdf")).thenReturn(Optional.of(placeholder));
- when(documentService.save(any())).thenAnswer(inv -> inv.getArgument(0));
-
- service.importSingleDocument(minimalCells("existing.pdf"), Optional.empty(), "existing.pdf", "existing");
-
- verify(documentService).save(same(placeholder));
- }
-
- // ─── importSingleDocument — with file (S3 upload) ─────────────────────────
-
- @Test
- void importSingleDocument_uploadsFileToS3_andSetsStatusUploaded(@TempDir Path tempDir) throws Exception {
- Path tempFile = tempDir.resolve("doc003.pdf");
- Files.write(tempFile, "PDF content".getBytes());
-
- when(documentService.findByOriginalFilename("doc003.pdf")).thenReturn(Optional.empty());
- when(documentService.save(any())).thenAnswer(inv -> inv.getArgument(0));
-
- service.importSingleDocument(
- minimalCells("doc003.pdf"), Optional.of(tempFile.toFile()), "doc003.pdf", "doc003");
-
- verify(s3Client).putObject(any(PutObjectRequest.class), any(RequestBody.class));
- verify(documentService).save(argThat(d -> d.getStatus() == DocumentStatus.UPLOADED));
- }
-
- @Test
- void importSingleDocument_returnsS3UploadFailed_whenS3UploadFails(@TempDir Path tempDir) throws Exception {
- Path tempFile = tempDir.resolve("fail.pdf");
- Files.write(tempFile, "data".getBytes());
-
- when(documentService.findByOriginalFilename("fail.pdf")).thenReturn(Optional.empty());
- doThrow(new RuntimeException("S3 error"))
- .when(s3Client).putObject(any(PutObjectRequest.class), any(RequestBody.class));
-
- Optional result = service.importSingleDocument(
- minimalCells("fail.pdf"), Optional.of(tempFile.toFile()), "fail.pdf", "fail");
-
- verify(documentService, never()).save(any());
- assertThat(result).isPresent().contains(MassImportService.SkipReason.S3_UPLOAD_FAILED);
- }
-
- // ─── importSingleDocument — sender handling ───────────────────────────────
-
- @Test
- void importSingleDocument_setsNullSender_whenSenderCellIsBlank() {
- when(documentService.findByOriginalFilename("nosender.pdf")).thenReturn(Optional.empty());
- when(documentService.save(any())).thenAnswer(inv -> inv.getArgument(0));
-
- List cells = buildCells("nosender.pdf", "", "", "");
- service.importSingleDocument(cells, Optional.empty(), "nosender.pdf", "nosender");
-
- verify(documentService).save(argThat(d -> d.getSender() == null));
- verify(personService, never()).findOrCreateByAlias(any());
- }
-
- @Test
- void importSingleDocument_createsSender_whenSenderCellIsNonBlank() {
- Person sender = Person.builder().id(UUID.randomUUID()).firstName("Walter").lastName("Müller").build();
- when(documentService.findByOriginalFilename("withsender.pdf")).thenReturn(Optional.empty());
- when(documentService.save(any())).thenAnswer(inv -> inv.getArgument(0));
- when(personService.findOrCreateByAlias("Walter Müller")).thenReturn(sender);
-
- List cells = buildCells("withsender.pdf", "Walter Müller", "", "");
- service.importSingleDocument(cells, Optional.empty(), "withsender.pdf", "withsender");
-
- verify(personService).findOrCreateByAlias("Walter Müller");
- verify(documentService).save(argThat(d -> d.getSender() == sender));
- }
-
- // ─── importSingleDocument — tag handling ─────────────────────────────────
-
- @Test
- void importSingleDocument_createsTag_whenTagCellIsNonBlank() {
- Tag tag = Tag.builder().id(UUID.randomUUID()).name("Familie").build();
- when(documentService.findByOriginalFilename("tagged.pdf")).thenReturn(Optional.empty());
- when(documentService.save(any())).thenAnswer(inv -> inv.getArgument(0));
- when(tagService.findOrCreate("Familie")).thenReturn(tag);
-
- List cells = buildCells("tagged.pdf", "", "", "Familie");
- service.importSingleDocument(cells, Optional.empty(), "tagged.pdf", "tagged");
-
- verify(tagService).findOrCreate("Familie");
- }
-
- @Test
- void importSingleDocument_doesNotCreateTag_whenTagCellIsBlank() {
- when(documentService.findByOriginalFilename("notag.pdf")).thenReturn(Optional.empty());
- when(documentService.save(any())).thenAnswer(inv -> inv.getArgument(0));
-
- List cells = buildCells("notag.pdf", "", "", "");
- service.importSingleDocument(cells, Optional.empty(), "notag.pdf", "notag");
-
- verify(tagService, never()).findOrCreate(any());
- }
-
- // ─── importSingleDocument — metadataComplete heuristic ───────────────────
-
- @Test
- void importSingleDocument_metadataComplete_whenSenderPresent() {
- Person sender = Person.builder().id(UUID.randomUUID()).firstName("A").lastName("B").build();
- when(documentService.findByOriginalFilename("meta.pdf")).thenReturn(Optional.empty());
- when(documentService.save(any())).thenAnswer(inv -> inv.getArgument(0));
- when(personService.findOrCreateByAlias("A B")).thenReturn(sender);
-
- List cells = buildCells("meta.pdf", "A B", "", "");
- service.importSingleDocument(cells, Optional.empty(), "meta.pdf", "meta");
-
- verify(documentService).save(argThat(Document::isMetadataComplete));
- }
-
- @Test
- void importSingleDocument_metadataIncomplete_whenNoKeyFieldsPresent() {
- when(documentService.findByOriginalFilename("nometa.pdf")).thenReturn(Optional.empty());
- when(documentService.save(any())).thenAnswer(inv -> inv.getArgument(0));
-
- List cells = buildCells("nometa.pdf", "", "", "");
- service.importSingleDocument(cells, Optional.empty(), "nometa.pdf", "nometa");
-
- verify(documentService).save(argThat(d -> !d.isMetadataComplete()));
- }
-
- // ─── importSingleDocument — blank fields set to null ─────────────────────
-
- @Test
- void importSingleDocument_setsBlankFieldsToNull() {
- when(documentService.findByOriginalFilename("blank.pdf")).thenReturn(Optional.empty());
- when(documentService.save(any())).thenAnswer(inv -> inv.getArgument(0));
-
- List cells = buildCells("blank.pdf", "", "", "");
- service.importSingleDocument(cells, Optional.empty(), "blank.pdf", "blank");
-
- verify(documentService).save(argThat(d ->
- d.getLocation() == null &&
- d.getSummary() == null &&
- d.getTranscription() == null &&
- d.getArchiveBox() == null &&
- d.getArchiveFolder() == null));
- }
-
- // ─── processRows — via ReflectionTestUtils ────────────────────────────────
-
- @Test
- void processRows_returnsZero_whenOnlyHeaderRow() {
- List> rows = List.of(List.of("header", "col1"));
- MassImportService.ProcessResult result = ReflectionTestUtils.invokeMethod(service, "processRows", rows);
- assertThat(result.processed()).isEqualTo(0);
- }
-
- @Test
- void processRows_skipsRowWithBlankIndex() {
- List> rows = List.of(
- List.of("header"),
- minimalCells("") // blank index
- );
- MassImportService.ProcessResult result = ReflectionTestUtils.invokeMethod(service, "processRows", rows);
- assertThat(result.processed()).isEqualTo(0);
- verify(documentService, never()).findByOriginalFilename(any());
- }
-
- @Test
- void processRows_addsExtension_whenIndexHasNoDot() {
- when(documentService.findByOriginalFilename("doc001.pdf")).thenReturn(Optional.empty());
- when(documentService.save(any())).thenAnswer(inv -> inv.getArgument(0));
-
- List> rows = List.of(
- List.of("header"),
- minimalCells("doc001") // no dot → appends ".pdf"
- );
- MassImportService.ProcessResult result = ReflectionTestUtils.invokeMethod(service, "processRows", rows);
-
- assertThat(result.processed()).isEqualTo(1);
- verify(documentService).findByOriginalFilename("doc001.pdf");
- }
-
- @Test
- void processRows_usesFilenameAsIs_whenIndexHasDot() {
- when(documentService.findByOriginalFilename("doc002.pdf")).thenReturn(Optional.empty());
- when(documentService.save(any())).thenAnswer(inv -> inv.getArgument(0));
-
- List> rows = List.of(
- List.of("header"),
- minimalCells("doc002.pdf") // has dot → used as-is
- );
- MassImportService.ProcessResult result = ReflectionTestUtils.invokeMethod(service, "processRows", rows);
-
- assertThat(result.processed()).isEqualTo(1);
- verify(documentService).findByOriginalFilename("doc002.pdf");
- }
-
- // ─── isValidImportFilename — security regression — do not remove ─────────
-
- @Test
- void isValidImportFilename_returnsFalse_whenFilenameIsNull() {
- boolean result = ReflectionTestUtils.invokeMethod(service, "isValidImportFilename", (String) null);
- assertThat(result).isFalse();
- }
-
- @Test
- void isValidImportFilename_returnsFalse_whenFilenameIsBlank() {
- boolean result = ReflectionTestUtils.invokeMethod(service, "isValidImportFilename", " ");
- assertThat(result).isFalse();
- }
-
- @Test
- void isValidImportFilename_returnsFalse_whenFilenameContainsForwardSlash() {
- boolean result = ReflectionTestUtils.invokeMethod(service, "isValidImportFilename", "etc/passwd");
- assertThat(result).isFalse();
- }
-
- @Test
- void isValidImportFilename_returnsFalse_whenFilenameContainsBackslash() {
- boolean result = ReflectionTestUtils.invokeMethod(service, "isValidImportFilename", "..\\etc\\passwd");
- assertThat(result).isFalse();
- }
-
- @Test
- void isValidImportFilename_returnsFalse_whenFilenameContainsDotDot() {
- boolean result = ReflectionTestUtils.invokeMethod(service, "isValidImportFilename", "doc..evil.pdf");
- assertThat(result).isFalse();
- }
-
- @Test
- void isValidImportFilename_returnsFalse_whenFilenameIsDotDot() {
- boolean result = ReflectionTestUtils.invokeMethod(service, "isValidImportFilename", "..");
- assertThat(result).isFalse();
- }
-
- @Test
- void isValidImportFilename_returnsFalse_whenFilenameIsAbsolutePath() {
- boolean result = ReflectionTestUtils.invokeMethod(service, "isValidImportFilename", "/etc/passwd");
- assertThat(result).isFalse();
- }
-
- @Test
- void isValidImportFilename_returnsFalse_whenFilenameContainsNullByte() {
- boolean result = ReflectionTestUtils.invokeMethod(service, "isValidImportFilename", "file\0.pdf");
- assertThat(result).isFalse();
- }
-
- @Test
- void isValidImportFilename_returnsTrue_whenFilenameIsPlainBasename() {
- boolean result = ReflectionTestUtils.invokeMethod(service, "isValidImportFilename", "document.pdf");
- assertThat(result).isTrue();
- }
-
- @Test
- void isValidImportFilename_returnsFalse_whenFilenameContainsUnicodeDivisionSlash() {
- boolean result = ReflectionTestUtils.invokeMethod(service, "isValidImportFilename", "foo∕bar.pdf");
- assertThat(result).isFalse();
- }
-
- @Test
- void isValidImportFilename_returnsFalse_whenFilenameContainsFullwidthSlash() {
- boolean result = ReflectionTestUtils.invokeMethod(service, "isValidImportFilename", "foo/bar.pdf");
- assertThat(result).isFalse();
- }
-
- @Test
- void isValidImportFilename_returnsFalse_whenFilenameContainsUnicodeReverseSolidus() {
- boolean result = ReflectionTestUtils.invokeMethod(service, "isValidImportFilename", "foo⧵bar.pdf");
- assertThat(result).isFalse();
- }
-
- @Test
- void isValidImportFilename_returnsTrue_whenFilenameHasLeadingDot() {
- boolean result = ReflectionTestUtils.invokeMethod(service, "isValidImportFilename", ".hidden.pdf");
- assertThat(result).isTrue();
- }
-
- @Test
- void isValidImportFilename_returnsTrue_whenFilenameHasSpaces() {
- boolean result = ReflectionTestUtils.invokeMethod(service, "isValidImportFilename", "Brief an Oma.pdf");
- assertThat(result).isTrue();
- }
-
- @Test
- void processRows_skipsRowAndContinues_whenFilenameIsPathTraversal() {
- when(documentService.findByOriginalFilename("legitimate.pdf")).thenReturn(Optional.empty());
- when(documentService.save(any())).thenAnswer(inv -> inv.getArgument(0));
-
- List> rows = List.of(
- List.of("header"),
- minimalCells("../evil"), // row 1: path traversal — should be skipped
- minimalCells("legitimate.pdf") // row 2: valid — should be processed
- );
- MassImportService.ProcessResult result = ReflectionTestUtils.invokeMethod(service, "processRows", rows);
-
- assertThat(result.processed()).isEqualTo(1);
- assertThat(result.skippedFiles())
- .extracting(MassImportService.SkippedFile::reason)
- .containsExactly(MassImportService.SkipReason.INVALID_FILENAME_PATH_TRAVERSAL);
- }
-
- // ─── importSingleDocument — non-blank optional fields ────────────────────
-
- @Test
- void importSingleDocument_setsNonNullOptionalFields_whenPresent() {
- when(documentService.findByOriginalFilename("rich.pdf")).thenReturn(Optional.empty());
- when(documentService.save(any())).thenAnswer(inv -> inv.getArgument(0));
-
- // box=1, folder=2, location=9, summary=11, transcription=13
- List cells = List.of(
- "rich.pdf", // 0: index
- "Box A", // 1: box
- "Folder B", // 2: folder
- "", // 3: sender
- "", // 4: unused
- "", // 5: receivers
- "", // 6: unused
- "", // 7: date
- "", // 8: unused
- "Hamburg", // 9: location
- "", // 10: tags
- "A summary", // 11: summary
- "", // 12: unused
- "A transcript" // 13: transcription
- );
-
- service.importSingleDocument(cells, Optional.empty(), "rich.pdf", "rich");
-
- verify(documentService).save(argThat(d ->
- "Box A".equals(d.getArchiveBox()) &&
- "Folder B".equals(d.getArchiveFolder()) &&
- "Hamburg".equals(d.getLocation()) &&
- "A summary".equals(d.getSummary()) &&
- "A transcript".equals(d.getTranscription())));
- }
-
- @Test
- void importSingleDocument_setsMetadataComplete_whenReceiversArePresent() {
- Person receiver = Person.builder().id(UUID.randomUUID()).firstName("Walter").lastName("Müller").build();
- when(documentService.findByOriginalFilename("rcv.pdf")).thenReturn(Optional.empty());
- when(documentService.save(any())).thenAnswer(inv -> inv.getArgument(0));
- when(personService.findOrCreateByAlias("Walter Müller")).thenReturn(receiver);
-
- List cells = List.of(
- "rcv.pdf", "", "", "", "", "Walter Müller", "", "", "", "", "", "", "", "");
- service.importSingleDocument(cells, Optional.empty(), "rcv.pdf", "rcv");
-
- verify(documentService).save(argThat(Document::isMetadataComplete));
- }
-
- @Test
- void importSingleDocument_setsMetadataComplete_whenDateIsPresent() {
- when(documentService.findByOriginalFilename("dated.pdf")).thenReturn(Optional.empty());
- when(documentService.save(any())).thenAnswer(inv -> inv.getArgument(0));
-
- List cells = List.of(
- "dated.pdf", "", "", "", "", "", "", "2024-03-15", "", "", "", "", "", "");
- service.importSingleDocument(cells, Optional.empty(), "dated.pdf", "dated");
-
- verify(documentService).save(argThat(Document::isMetadataComplete));
- }
-
- // ─── buildTitle — null location ───────────────────────────────────────────
-
- @Test
- void buildTitle_withNullLocation_skipsLocationPart() {
- String result = ReflectionTestUtils.invokeMethod(service, "buildTitle",
- "doc005", LocalDate.of(1940, 5, 1), (String) null);
- assertThat(result).contains("doc005").contains("1940");
- assertThat(result).doesNotContain("Berlin");
- }
-
- // ─── parseDate — via ReflectionTestUtils ─────────────────────────────────
-
- @Test
- void parseDate_returnsNull_whenValueIsNull() {
- LocalDate result = ReflectionTestUtils.invokeMethod(service, "parseDate", (String) null);
- assertThat(result).isNull();
- }
-
- @Test
- void parseDate_returnsNull_whenValueIsBlank() {
- LocalDate result = ReflectionTestUtils.invokeMethod(service, "parseDate", " ");
- assertThat(result).isNull();
- }
-
- @Test
- void parseDate_returnsDate_whenValidIsoFormat() {
- LocalDate result = ReflectionTestUtils.invokeMethod(service, "parseDate", "2024-03-15");
- assertThat(result).isEqualTo(LocalDate.of(2024, 3, 15));
- }
-
- @Test
- void parseDate_returnsNull_whenInvalidDateString() {
- LocalDate result = ReflectionTestUtils.invokeMethod(service, "parseDate", "15.03.2024");
- assertThat(result).isNull();
- }
-
- // ─── buildTitle — via ReflectionTestUtils ────────────────────────────────
-
- @Test
- void buildTitle_withDateAndLocation() {
- String result = ReflectionTestUtils.invokeMethod(service, "buildTitle",
- "doc001", LocalDate.of(1940, 5, 1), "Berlin");
- assertThat(result).contains("doc001").contains("Berlin").contains("1940");
- }
-
- @Test
- void buildTitle_withDateOnly() {
- String result = ReflectionTestUtils.invokeMethod(service, "buildTitle",
- "doc002", LocalDate.of(1960, 8, 15), "");
- assertThat(result).contains("doc002").contains("1960");
- assertThat(result).doesNotContain("Berlin");
- }
-
- @Test
- void buildTitle_withIndexOnly_whenDateAndLocationAreNull() {
- String result = ReflectionTestUtils.invokeMethod(service, "buildTitle",
- "doc003", null, "");
- assertThat(result).isEqualTo("doc003");
- }
-
- @Test
- void buildTitle_withLocationOnly_whenDateIsNull() {
- // date=null, location present → date part skipped, location appended
- String result = ReflectionTestUtils.invokeMethod(service, "buildTitle",
- "doc004", null, "Berlin");
- assertThat(result).contains("doc004").contains("Berlin");
- assertThat(result).doesNotContain("("); // no date part
- }
-
- // ─── getCell — via ReflectionTestUtils ───────────────────────────────────
-
- @Test
- void getCell_returnsEmptyString_whenColBeyondListSize() {
- List cells = List.of("a", "b");
- String result = ReflectionTestUtils.invokeMethod(service, "getCell", cells, 5);
- assertThat(result).isEmpty();
- }
-
- @Test
- void getCell_returnsEmptyString_whenValueIsNull() {
- List cells = new ArrayList<>();
- cells.add(null);
- cells.add("b");
- String result = ReflectionTestUtils.invokeMethod(service, "getCell", cells, 0);
- assertThat(result).isEmpty();
- }
-
- @Test
- void getCell_returnsTrimmedValue() {
- List cells = List.of(" hello ", "world");
- String result = ReflectionTestUtils.invokeMethod(service, "getCell", cells, 0);
- assertThat(result).isEqualTo("hello");
- }
-
- // ─── PDF magic byte validation regression ─────────────────────────────────
-
- @Test
- void runImportAsync_uploadsValidPdf_andSkipsFakeOne(@TempDir Path tempDir) throws Exception {
- setupOneValidOneFakeImport(tempDir);
-
- service.runImportAsync();
-
- verify(s3Client, times(1)).putObject(any(PutObjectRequest.class), any(RequestBody.class));
- }
-
- @Test
- void runImportAsync_setsSkippedCount_toOne_whenOneFakeFile(@TempDir Path tempDir) throws Exception {
- setupOneValidOneFakeImport(tempDir);
-
- service.runImportAsync();
-
- assertThat(service.getStatus().skipped()).isEqualTo(1);
- }
-
- @Test
- void runImportAsync_includesRejectedFilename_inSkippedFiles(@TempDir Path tempDir) throws Exception {
- setupOneValidOneFakeImport(tempDir);
-
- service.runImportAsync();
-
- assertThat(service.getStatus().skippedFiles())
- .extracting(MassImportService.SkippedFile::filename)
- .contains("fake.pdf");
- }
-
- @Test
- void runImportAsync_skipsFile_whenShorterThanFourBytes(@TempDir Path tempDir) throws Exception {
- Files.write(tempDir.resolve("tiny.pdf"), new byte[]{0x25, 0x50, 0x44}); // only 3 bytes
- buildMinimalImportXlsx(tempDir, "tiny.pdf");
- ReflectionTestUtils.setField(service, "importDir", tempDir.toString());
- lenient().when(documentService.findByOriginalFilename(any())).thenReturn(Optional.empty());
-
- service.runImportAsync();
-
- assertThat(service.getStatus().skipped()).isEqualTo(1);
- }
-
- @Test
- void runImportAsync_skipsFile_whenMagicBytesCheckThrowsIOException(@TempDir Path tempDir) throws Exception {
- Files.writeString(tempDir.resolve("unreadable.pdf"), "some content");
- buildMinimalImportXlsx(tempDir, "unreadable.pdf");
- ReflectionTestUtils.setField(service, "importDir", tempDir.toString());
- lenient().when(documentService.findByOriginalFilename(any())).thenReturn(Optional.empty());
-
- MassImportService spyService = spy(service);
- doThrow(new java.io.IOException("simulated read error")).when(spyService).openFileStream(any(File.class));
-
- spyService.runImportAsync();
-
- assertThat(spyService.getStatus().skipped()).isEqualTo(1);
- assertThat(spyService.getStatus().skippedFiles())
- .extracting(MassImportService.SkippedFile::reason)
- .containsExactly(MassImportService.SkipReason.FILE_READ_ERROR);
- }
-
- // ─── findFileRecursive — symlink escape security regression — do not remove ─
-
- @Test
- void findFileRecursive_throwsDomainException_whenSymlinkEscapesImportDir(
- @TempDir Path importDirPath, @TempDir Path outsideDir) throws Exception {
- Path outsideFile = outsideDir.resolve("secret.pdf");
- Files.writeString(outsideFile, "sensitive content");
- Files.createSymbolicLink(importDirPath.resolve("secret.pdf"), outsideFile);
-
- ReflectionTestUtils.setField(service, "importDir", importDirPath.toString());
-
- assertThatThrownBy(() -> ReflectionTestUtils.invokeMethod(service, "findFileRecursive", "secret.pdf"))
- .isInstanceOf(DomainException.class);
- }
-
- // ─── readOds — XXE security regression ───────────────────────────────────
-
- // Security regression — do not remove.
- @Test
- void readOds_rejects_xxe_doctype_payload(@TempDir Path tempDir) throws Exception {
- File malicious = buildXxeOds(tempDir, "file:///etc/hostname");
- assertThatThrownBy(() -> service.readOds(malicious))
- .isInstanceOf(SAXParseException.class)
- .hasMessageContaining("DOCTYPE is disallowed");
- }
-
- @Test
- void readOds_parses_valid_ods_correctly(@TempDir Path tempDir) throws Exception {
- File valid = buildValidOds(tempDir, "Mustermann");
- List> rows = service.readOds(valid);
- assertThat(rows).isNotEmpty();
- assertThat(rows.get(0)).contains("Mustermann");
- }
-
- // ─── helpers ──────────────────────────────────────────────────────────────
-
- /**
- * Builds a minimal 14-element cell row with the given filename at index 0
- * and blanks for all optional fields.
- */
- private List minimalCells(String filename) {
- return buildCells(filename, "", "", "");
- }
-
- /**
- * Builds a cell row with sender, receiver, and tag controls.
- * Layout matches the default column indices set in setUp().
- */
- private List buildCells(String filename, String sender, String receivers, String tag) {
- // 14 elements: index=0,box=1,folder=2,sender=3,[4],receivers=5,[6],date=7,[8],location=9,tag=10,summary=11,[12],transcription=13
- return List.of(
- filename, // 0: index
- "", // 1: box
- "", // 2: folder
- sender, // 3: sender
- "", // 4: (unused)
- receivers, // 5: receivers
- "", // 6: (unused)
- "", // 7: date
- "", // 8: (unused)
- "", // 9: location
- tag, // 10: tags
- "", // 11: summary
- "", // 12: (unused)
- "" // 13: transcription
- );
- }
-
- /** Creates a minimal ODS ZIP containing a content.xml with an XXE payload. */
- private File buildXxeOds(Path dir, String entityTarget) throws Exception {
- String xml = ""
- + "]>"
- + ""
- + ""
- + ""
- + "&xxe;"
- + ""
- + ""
- + "";
- return writeOdsZip(dir.resolve("malicious.ods"), xml);
- }
-
- /** Creates a minimal valid ODS ZIP containing a content.xml with the given cell value.
- * cellValue must not contain XML metacharacters ({@code < > &}). */
- private File buildValidOds(Path dir, String cellValue) throws Exception {
- String xml = ""
- + ""
- + ""
- + ""
- + "" + cellValue + ""
- + ""
- + ""
- + "";
- return writeOdsZip(dir.resolve("valid.ods"), xml);
- }
-
- private File writeOdsZip(Path destination, String contentXml) throws Exception {
- try (OutputStream fos = Files.newOutputStream(destination);
- ZipOutputStream zip = new ZipOutputStream(fos)) {
- zip.putNextEntry(new ZipEntry("content.xml"));
- zip.write(contentXml.getBytes(StandardCharsets.UTF_8));
- zip.closeEntry();
- }
- return destination.toFile();
- }
-
- private void setupOneValidOneFakeImport(Path tempDir) throws Exception {
- byte[] pdfHeader = {0x25, 0x50, 0x44, 0x46, 0x2D}; // %PDF-
- Files.write(tempDir.resolve("real.pdf"), pdfHeader);
- Files.writeString(tempDir.resolve("fake.pdf"), "not a pdf");
- buildMinimalImportXlsx(tempDir, "real.pdf", "fake.pdf");
- ReflectionTestUtils.setField(service, "importDir", tempDir.toString());
- when(documentService.findByOriginalFilename(any())).thenReturn(Optional.empty());
- when(documentService.save(any())).thenAnswer(inv -> inv.getArgument(0));
- }
-
- private void buildMinimalImportXlsx(Path dir, String... filenames) throws Exception {
- Path xlsx = dir.resolve("import.xlsx");
- try (XSSFWorkbook wb = new XSSFWorkbook()) {
- org.apache.poi.ss.usermodel.Sheet sheet = wb.createSheet("Sheet1");
- sheet.createRow(0).createCell(0).setCellValue("Index");
- for (int i = 0; i < filenames.length; i++) {
- sheet.createRow(i + 1).createCell(0).setCellValue(filenames[i]);
- }
- try (OutputStream out = Files.newOutputStream(xlsx)) {
- wb.write(out);
- }
- }
- }
-}
diff --git a/backend/src/test/java/org/raddatz/familienarchiv/importing/PersonRegisterImporterTest.java b/backend/src/test/java/org/raddatz/familienarchiv/importing/PersonRegisterImporterTest.java
new file mode 100644
index 00000000..af5740c0
--- /dev/null
+++ b/backend/src/test/java/org/raddatz/familienarchiv/importing/PersonRegisterImporterTest.java
@@ -0,0 +1,130 @@
+package org.raddatz.familienarchiv.importing;
+
+import org.apache.poi.ss.usermodel.Row;
+import org.apache.poi.ss.usermodel.Sheet;
+import org.apache.poi.xssf.usermodel.XSSFWorkbook;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.extension.ExtendWith;
+import org.junit.jupiter.api.io.TempDir;
+import org.mockito.ArgumentCaptor;
+import org.mockito.junit.jupiter.MockitoExtension;
+import org.raddatz.familienarchiv.person.Person;
+import org.raddatz.familienarchiv.person.PersonService;
+import org.raddatz.familienarchiv.person.PersonUpsertCommand;
+
+import java.io.OutputStream;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.LinkedHashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.UUID;
+
+import static org.assertj.core.api.Assertions.assertThat;
+import static org.mockito.ArgumentMatchers.any;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.times;
+import static org.mockito.Mockito.verify;
+import static org.mockito.Mockito.when;
+
+@ExtendWith(MockitoExtension.class)
+class PersonRegisterImporterTest {
+
+ @Test
+ void load_upsertsPersonBySourceRef_withProvisionalFalse(@TempDir Path tempDir) throws Exception {
+ PersonService personService = mock(PersonService.class);
+ when(personService.upsertBySourceRef(any())).thenAnswer(inv -> personOf(inv.getArgument(0)));
+ Path xlsx = writePersons(tempDir, row(
+ "allemeyer-elsgard", "Allemeyer", "Elsgard", "Wöhler", "Nichte von Herbert", "False"));
+
+ new PersonRegisterImporter(personService).load(xlsx.toFile());
+
+ ArgumentCaptor captor = ArgumentCaptor.forClass(PersonUpsertCommand.class);
+ verify(personService).upsertBySourceRef(captor.capture());
+ PersonUpsertCommand cmd = captor.getValue();
+ assertThat(cmd.sourceRef()).isEqualTo("allemeyer-elsgard");
+ assertThat(cmd.lastName()).isEqualTo("Allemeyer");
+ assertThat(cmd.firstName()).isEqualTo("Elsgard");
+ assertThat(cmd.maidenName()).isEqualTo("Wöhler");
+ assertThat(cmd.notes()).isEqualTo("Nichte von Herbert");
+ assertThat(cmd.provisional()).isFalse();
+ }
+
+ @Test
+ void load_parsesCapitalisedPythonBool_True(@TempDir Path tempDir) throws Exception {
+ PersonService personService = mock(PersonService.class);
+ when(personService.upsertBySourceRef(any())).thenAnswer(inv -> personOf(inv.getArgument(0)));
+ Path xlsx = writePersons(tempDir, row(
+ "noise-geschirr", "Geschirr", "", "", "", "True"));
+
+ new PersonRegisterImporter(personService).load(xlsx.toFile());
+
+ ArgumentCaptor captor = ArgumentCaptor.forClass(PersonUpsertCommand.class);
+ verify(personService).upsertBySourceRef(captor.capture());
+ assertThat(captor.getValue().provisional()).isTrue();
+ }
+
+ @Test
+ void load_skipsRowWithBlankPersonId(@TempDir Path tempDir) throws Exception {
+ PersonService personService = mock(PersonService.class);
+ Path xlsx = writePersons(tempDir, row("", "NoId", "", "", "", "False"));
+
+ new PersonRegisterImporter(personService).load(xlsx.toFile());
+
+ verify(personService, times(0)).upsertBySourceRef(any());
+ }
+
+ @Test
+ void load_returnsCountOfProcessedRows(@TempDir Path tempDir) throws Exception {
+ PersonService personService = mock(PersonService.class);
+ when(personService.upsertBySourceRef(any())).thenAnswer(inv -> personOf(inv.getArgument(0)));
+ Path xlsx = writePersons(tempDir,
+ row("a-one", "One", "A", "", "", "False"),
+ row("a-two", "Two", "B", "", "", "False"));
+
+ int processed = new PersonRegisterImporter(personService).load(xlsx.toFile());
+
+ assertThat(processed).isEqualTo(2);
+ }
+
+ private static Person personOf(PersonUpsertCommand cmd) {
+ return Person.builder().id(UUID.randomUUID()).sourceRef(cmd.sourceRef())
+ .firstName(cmd.firstName()).lastName(cmd.lastName())
+ .provisional(cmd.provisional()).build();
+ }
+
+ private Map row(String personId, String lastName, String firstName,
+ String maidenName, String notes, String provisional) {
+ Map r = new LinkedHashMap<>();
+ r.put("person_id", personId);
+ r.put("last_name", lastName);
+ r.put("first_name", firstName);
+ r.put("maiden_name", maidenName);
+ r.put("notes", notes);
+ r.put("provisional", provisional);
+ return r;
+ }
+
+ @SafeVarargs
+ private Path writePersons(Path dir, Map... rows) throws Exception {
+ Path xlsx = dir.resolve("canonical-persons.xlsx");
+ List headers = List.of("person_id", "last_name", "first_name", "maiden_name", "notes", "provisional");
+ try (XSSFWorkbook wb = new XSSFWorkbook()) {
+ Sheet sheet = wb.createSheet("Sheet1");
+ Row header = sheet.createRow(0);
+ for (int i = 0; i < headers.size(); i++) {
+ header.createCell(i).setCellValue(headers.get(i));
+ }
+ for (int r = 0; r < rows.length; r++) {
+ Row row = sheet.createRow(r + 1);
+ for (int c = 0; c < headers.size(); c++) {
+ row.createCell(c).setCellValue(rows[r].getOrDefault(headers.get(c), ""));
+ }
+ }
+ try (OutputStream out = Files.newOutputStream(xlsx)) {
+ wb.write(out);
+ }
+ }
+ return xlsx;
+ }
+}
diff --git a/backend/src/test/java/org/raddatz/familienarchiv/importing/PersonTreeImporterTest.java b/backend/src/test/java/org/raddatz/familienarchiv/importing/PersonTreeImporterTest.java
new file mode 100644
index 00000000..ce90d260
--- /dev/null
+++ b/backend/src/test/java/org/raddatz/familienarchiv/importing/PersonTreeImporterTest.java
@@ -0,0 +1,163 @@
+package org.raddatz.familienarchiv.importing;
+
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.extension.ExtendWith;
+import org.junit.jupiter.api.io.TempDir;
+import org.mockito.ArgumentCaptor;
+import org.mockito.junit.jupiter.MockitoExtension;
+import org.raddatz.familienarchiv.exception.DomainException;
+import org.raddatz.familienarchiv.exception.ErrorCode;
+import org.raddatz.familienarchiv.person.Person;
+import org.raddatz.familienarchiv.person.PersonService;
+import org.raddatz.familienarchiv.person.PersonUpsertCommand;
+import org.raddatz.familienarchiv.person.relationship.RelationType;
+import org.raddatz.familienarchiv.person.relationship.RelationshipService;
+import org.raddatz.familienarchiv.person.relationship.dto.CreateRelationshipRequest;
+
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.UUID;
+
+import static org.assertj.core.api.Assertions.assertThat;
+import static org.assertj.core.api.Assertions.assertThatThrownBy;
+import static org.mockito.ArgumentMatchers.any;
+import static org.mockito.ArgumentMatchers.eq;
+import static org.mockito.Mockito.doThrow;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.verify;
+import static org.mockito.Mockito.when;
+
+@ExtendWith(MockitoExtension.class)
+class PersonTreeImporterTest {
+
+ @Test
+ void load_upsertsTreePersonBySourceRef_withFamilyMemberFlag(@TempDir Path tempDir) throws Exception {
+ PersonService personService = mock(PersonService.class);
+ RelationshipService relationshipService = mock(RelationshipService.class);
+ when(personService.upsertBySourceRef(any())).thenAnswer(inv -> personOf(inv.getArgument(0)));
+ Path json = write(tempDir, """
+ {"persons":[
+ {"rowId":"row_002","firstName":"Elsgard","lastName":"Allemeyer","maidenName":"Wöhler",
+ "notes":"Nichte","birthYear":1920,"deathYear":1999,"familyMember":true,"personId":"allemeyer-elsgard"}
+ ],"relationships":[]}
+ """);
+
+ new PersonTreeImporter(personService, relationshipService)
+ .load(json.toFile());
+
+ ArgumentCaptor captor = ArgumentCaptor.forClass(PersonUpsertCommand.class);
+ verify(personService).upsertBySourceRef(captor.capture());
+ PersonUpsertCommand cmd = captor.getValue();
+ assertThat(cmd.sourceRef()).isEqualTo("allemeyer-elsgard");
+ assertThat(cmd.familyMember()).isTrue();
+ assertThat(cmd.provisional()).isFalse();
+ }
+
+ @Test
+ void load_createsRelationship_resolvingRowIdsToUpsertedPersons(@TempDir Path tempDir) throws Exception {
+ PersonService personService = mock(PersonService.class);
+ RelationshipService relationshipService = mock(RelationshipService.class);
+ UUID idA = UUID.randomUUID();
+ UUID idB = UUID.randomUUID();
+ when(personService.upsertBySourceRef(any())).thenAnswer(inv -> {
+ PersonUpsertCommand c = inv.getArgument(0);
+ return Person.builder().id(c.sourceRef().equals("a") ? idA : idB)
+ .sourceRef(c.sourceRef()).lastName(c.lastName()).build();
+ });
+ Path json = write(tempDir, """
+ {"persons":[
+ {"rowId":"row_a","lastName":"A","familyMember":true,"personId":"a"},
+ {"rowId":"row_b","lastName":"B","familyMember":true,"personId":"b"}
+ ],"relationships":[
+ {"personId":"row_a","relatedPersonId":"row_b","type":"SPOUSE_OF","source":"verheiratet_mit"}
+ ]}
+ """);
+
+ new PersonTreeImporter(personService, relationshipService)
+ .load(json.toFile());
+
+ ArgumentCaptor captor = ArgumentCaptor.forClass(CreateRelationshipRequest.class);
+ verify(relationshipService).addRelationship(eq(idA), captor.capture());
+ assertThat(captor.getValue().relatedPersonId()).isEqualTo(idB);
+ assertThat(captor.getValue().relationType()).isEqualTo(RelationType.SPOUSE_OF);
+ }
+
+ @Test
+ void load_swallowsDuplicateRelationship_forIdempotentReimport(@TempDir Path tempDir) throws Exception {
+ PersonService personService = mock(PersonService.class);
+ RelationshipService relationshipService = mock(RelationshipService.class);
+ when(personService.upsertBySourceRef(any()))
+ .thenAnswer(inv -> personOf(inv.getArgument(0)));
+ doThrow(DomainException.conflict(ErrorCode.DUPLICATE_RELATIONSHIP, "exists"))
+ .when(relationshipService).addRelationship(any(), any());
+ Path json = write(tempDir, """
+ {"persons":[
+ {"rowId":"row_a","lastName":"A","familyMember":true,"personId":"a"},
+ {"rowId":"row_b","lastName":"B","familyMember":true,"personId":"b"}
+ ],"relationships":[
+ {"personId":"row_a","relatedPersonId":"row_b","type":"SPOUSE_OF","source":"verheiratet_mit"}
+ ]}
+ """);
+
+ PersonTreeImporter importer = new PersonTreeImporter(personService, relationshipService);
+
+ // Must not propagate the conflict — re-import is idempotent.
+ importer.load(json.toFile());
+
+ verify(relationshipService).addRelationship(any(), any());
+ }
+
+ @Test
+ void load_propagatesUnexpectedDomainException_fromAddRelationship(@TempDir Path tempDir) throws Exception {
+ PersonService personService = mock(PersonService.class);
+ RelationshipService relationshipService = mock(RelationshipService.class);
+ when(personService.upsertBySourceRef(any()))
+ .thenAnswer(inv -> personOf(inv.getArgument(0)));
+ // An unexpected ErrorCode (not DUPLICATE/CIRCULAR) must NOT be swallowed.
+ doThrow(DomainException.internal(ErrorCode.INTERNAL_ERROR, "boom"))
+ .when(relationshipService).addRelationship(any(), any());
+ Path json = write(tempDir, """
+ {"persons":[
+ {"rowId":"row_a","lastName":"A","familyMember":true,"personId":"a"},
+ {"rowId":"row_b","lastName":"B","familyMember":true,"personId":"b"}
+ ],"relationships":[
+ {"personId":"row_a","relatedPersonId":"row_b","type":"SPOUSE_OF","source":"verheiratet_mit"}
+ ]}
+ """);
+
+ PersonTreeImporter importer = new PersonTreeImporter(personService, relationshipService);
+
+ assertThatThrownBy(() -> importer.load(json.toFile()))
+ .isInstanceOf(DomainException.class)
+ .extracting("code").isEqualTo(ErrorCode.INTERNAL_ERROR);
+ }
+
+ @Test
+ void load_skipsRelationship_whenRowIdUnresolved(@TempDir Path tempDir) throws Exception {
+ PersonService personService = mock(PersonService.class);
+ RelationshipService relationshipService = mock(RelationshipService.class);
+ when(personService.upsertBySourceRef(any())).thenAnswer(inv -> personOf(inv.getArgument(0)));
+ Path json = write(tempDir, """
+ {"persons":[
+ {"rowId":"row_a","lastName":"A","familyMember":true,"personId":"a"}
+ ],"relationships":[
+ {"personId":"row_a","relatedPersonId":"row_ghost","type":"SPOUSE_OF","source":"x"}
+ ]}
+ """);
+
+ new PersonTreeImporter(personService, relationshipService)
+ .load(json.toFile());
+
+ verify(relationshipService, org.mockito.Mockito.never()).addRelationship(any(), any());
+ }
+
+ private static Person personOf(PersonUpsertCommand cmd) {
+ return Person.builder().id(UUID.randomUUID()).sourceRef(cmd.sourceRef()).lastName(cmd.lastName()).build();
+ }
+
+ private Path write(Path dir, String json) throws Exception {
+ Path file = dir.resolve("canonical-persons-tree.json");
+ Files.writeString(file, json);
+ return file;
+ }
+}
diff --git a/backend/src/test/java/org/raddatz/familienarchiv/importing/TagTreeImporterTest.java b/backend/src/test/java/org/raddatz/familienarchiv/importing/TagTreeImporterTest.java
new file mode 100644
index 00000000..e6becae5
--- /dev/null
+++ b/backend/src/test/java/org/raddatz/familienarchiv/importing/TagTreeImporterTest.java
@@ -0,0 +1,103 @@
+package org.raddatz.familienarchiv.importing;
+
+import org.apache.poi.ss.usermodel.Row;
+import org.apache.poi.ss.usermodel.Sheet;
+import org.apache.poi.xssf.usermodel.XSSFWorkbook;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.extension.ExtendWith;
+import org.junit.jupiter.api.io.TempDir;
+import org.mockito.junit.jupiter.MockitoExtension;
+import org.raddatz.familienarchiv.tag.Tag;
+import org.raddatz.familienarchiv.tag.TagService;
+
+import java.io.OutputStream;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.List;
+import java.util.UUID;
+
+import static org.assertj.core.api.Assertions.assertThat;
+import static org.mockito.ArgumentMatchers.any;
+import static org.mockito.ArgumentMatchers.eq;
+import static org.mockito.ArgumentMatchers.isNull;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.verify;
+import static org.mockito.Mockito.when;
+
+@ExtendWith(MockitoExtension.class)
+class TagTreeImporterTest {
+
+ @Test
+ void load_upsertsRootTagWithNullParent(@TempDir Path tempDir) throws Exception {
+ TagService tagService = mock(TagService.class);
+ when(tagService.upsertBySourceRef(any(), any(), any()))
+ .thenAnswer(inv -> tagOf(inv.getArgument(0), inv.getArgument(1), inv.getArgument(2)));
+ Path xlsx = writeTagTree(tempDir, List.of(
+ new String[]{"Themen", "", "Themen"}));
+
+ new TagTreeImporter(tagService).load(xlsx.toFile());
+
+ verify(tagService).upsertBySourceRef("Themen", "Themen", null);
+ }
+
+ @Test
+ void load_resolvesParentByPath_forChildTag(@TempDir Path tempDir) throws Exception {
+ TagService tagService = mock(TagService.class);
+ UUID rootId = UUID.randomUUID();
+ when(tagService.upsertBySourceRef(eq("Themen"), eq("Themen"), isNull()))
+ .thenReturn(tagOf("Themen", "Themen", null, rootId));
+ when(tagService.upsertBySourceRef(eq("Themen/Brautbriefe"), eq("Brautbriefe"), eq(rootId)))
+ .thenReturn(tagOf("Themen/Brautbriefe", "Brautbriefe", rootId));
+ Path xlsx = writeTagTree(tempDir, List.of(
+ new String[]{"Themen", "", "Themen"},
+ new String[]{"Themen/Brautbriefe", "Themen", "Brautbriefe"}));
+
+ new TagTreeImporter(tagService).load(xlsx.toFile());
+
+ verify(tagService).upsertBySourceRef("Themen/Brautbriefe", "Brautbriefe", rootId);
+ }
+
+ @Test
+ void load_returnsCountOfProcessedRows(@TempDir Path tempDir) throws Exception {
+ TagService tagService = mock(TagService.class);
+ when(tagService.upsertBySourceRef(any(), any(), any()))
+ .thenAnswer(inv -> tagOf(inv.getArgument(0), inv.getArgument(1), inv.getArgument(2)));
+ Path xlsx = writeTagTree(tempDir, List.of(
+ new String[]{"Themen", "", "Themen"},
+ new String[]{"Themen/Brautbriefe", "Themen", "Brautbriefe"}));
+
+ int processed = new TagTreeImporter(tagService).load(xlsx.toFile());
+
+ assertThat(processed).isEqualTo(2);
+ }
+
+ private static Tag tagOf(String sourceRef, String name, UUID parentId) {
+ return tagOf(sourceRef, name, parentId, UUID.randomUUID());
+ }
+
+ private static Tag tagOf(String sourceRef, String name, UUID parentId, UUID id) {
+ return Tag.builder().id(id).sourceRef(sourceRef).name(name).parentId(parentId).build();
+ }
+
+ private Path writeTagTree(Path dir, List rows) throws Exception {
+ Path xlsx = dir.resolve("canonical-tag-tree.xlsx");
+ try (XSSFWorkbook wb = new XSSFWorkbook()) {
+ Sheet sheet = wb.createSheet("Sheet1");
+ Row header = sheet.createRow(0);
+ header.createCell(0).setCellValue("tag_path");
+ header.createCell(1).setCellValue("parent_name");
+ header.createCell(2).setCellValue("tag_name");
+ for (int r = 0; r < rows.size(); r++) {
+ Row row = sheet.createRow(r + 1);
+ String[] values = rows.get(r);
+ for (int c = 0; c < values.length; c++) {
+ row.createCell(c).setCellValue(values[c]);
+ }
+ }
+ try (OutputStream out = Files.newOutputStream(xlsx)) {
+ wb.write(out);
+ }
+ }
+ return xlsx;
+ }
+}
diff --git a/backend/src/test/java/org/raddatz/familienarchiv/person/PersonControllerTest.java b/backend/src/test/java/org/raddatz/familienarchiv/person/PersonControllerTest.java
index e7767411..d43e9a9a 100644
--- a/backend/src/test/java/org/raddatz/familienarchiv/person/PersonControllerTest.java
+++ b/backend/src/test/java/org/raddatz/familienarchiv/person/PersonControllerTest.java
@@ -65,44 +65,144 @@ class PersonControllerTest {
@Test
@WithMockUser(authorities = "READ_ALL")
- void getPersons_returns200_withEmptyList() throws Exception {
- when(personService.findAll(null)).thenReturn(Collections.emptyList());
+ void getPersons_returns200_withEmptyPagedResult() throws Exception {
+ when(personService.search(any(), eq(0), eq(50), eq(null)))
+ .thenReturn(PersonSearchResult.paged(Collections.emptyList(), 0, 50, 0));
mockMvc.perform(get("/api/persons"))
- .andExpect(status().isOk());
+ .andExpect(status().isOk())
+ .andExpect(jsonPath("$.items").isArray())
+ .andExpect(jsonPath("$.totalElements").value(0));
}
@Test
@WithMockUser(authorities = "READ_ALL")
void getPersons_delegatesQueryParam_toService() throws Exception {
PersonSummaryDTO dto = mockPersonSummary("Hans", "Müller");
- when(personService.findAll("Hans")).thenReturn(List.of(dto));
+ when(personService.search(any(), eq(0), eq(50), eq("Hans")))
+ .thenReturn(PersonSearchResult.paged(List.of(dto), 0, 50, 1));
mockMvc.perform(get("/api/persons").param("q", "Hans"))
.andExpect(status().isOk())
- .andExpect(jsonPath("$[0].firstName").value("Hans"));
+ .andExpect(jsonPath("$.items[0].firstName").value("Hans"));
}
@Test
@WithMockUser(authorities = "READ_ALL")
- void getPersons_delegatesTopByDocumentCount_whenSortAndSizeGiven() throws Exception {
+ void getPersons_passesFilterParams_toService() throws Exception {
+ ArgumentCaptor filterCaptor = ArgumentCaptor.forClass(PersonFilter.class);
+ when(personService.search(filterCaptor.capture(), eq(0), eq(50), eq(null)))
+ .thenReturn(PersonSearchResult.paged(Collections.emptyList(), 0, 50, 0));
+
+ mockMvc.perform(get("/api/persons")
+ .param("type", "INSTITUTION")
+ .param("familyOnly", "true")
+ .param("hasDocuments", "true")
+ .param("provisional", "false"))
+ .andExpect(status().isOk());
+
+ PersonFilter captured = filterCaptor.getValue();
+ assertThat(captured.type()).isEqualTo(PersonType.INSTITUTION);
+ assertThat(captured.familyOnly()).isTrue();
+ assertThat(captured.hasDocuments()).isTrue();
+ assertThat(captured.provisional()).isFalse();
+ }
+
+ @Test
+ @WithMockUser(authorities = "READ_ALL")
+ void getPersons_defaultsToReaderDefault_whenNoReviewFlag() throws Exception {
+ ArgumentCaptor filterCaptor = ArgumentCaptor.forClass(PersonFilter.class);
+ when(personService.search(filterCaptor.capture(), eq(0), eq(50), eq(null)))
+ .thenReturn(PersonSearchResult.paged(Collections.emptyList(), 0, 50, 0));
+
+ mockMvc.perform(get("/api/persons")).andExpect(status().isOk());
+
+ assertThat(filterCaptor.getValue().readerDefault()).isTrue();
+ }
+
+ @Test
+ @WithMockUser(authorities = "READ_ALL")
+ void getPersons_dropsReaderDefault_whenReviewFlagSet() throws Exception {
+ ArgumentCaptor filterCaptor = ArgumentCaptor.forClass(PersonFilter.class);
+ when(personService.search(filterCaptor.capture(), eq(0), eq(50), eq(null)))
+ .thenReturn(PersonSearchResult.paged(Collections.emptyList(), 0, 50, 0));
+
+ mockMvc.perform(get("/api/persons").param("review", "true")).andExpect(status().isOk());
+
+ assertThat(filterCaptor.getValue().readerDefault()).isFalse();
+ }
+
+ @Test
+ @WithMockUser(authorities = "READ_ALL")
+ void getPersons_passesPageAndSize_toService() throws Exception {
+ when(personService.search(any(), eq(2), eq(25), eq(null)))
+ .thenReturn(PersonSearchResult.paged(Collections.emptyList(), 2, 25, 0));
+
+ mockMvc.perform(get("/api/persons").param("page", "2").param("size", "25"))
+ .andExpect(status().isOk());
+
+ verify(personService).search(any(), eq(2), eq(25), eq(null));
+ }
+
+ @Test
+ @WithMockUser(authorities = "READ_ALL")
+ void getPersons_returns400_whenSizeIsZero() throws Exception {
+ mockMvc.perform(get("/api/persons").param("size", "0"))
+ .andExpect(status().isBadRequest());
+ }
+
+ @Test
+ @WithMockUser(authorities = "READ_ALL")
+ void getPersons_returns400_whenSizeExceeds100() throws Exception {
+ mockMvc.perform(get("/api/persons").param("size", "101"))
+ .andExpect(status().isBadRequest());
+ }
+
+ @Test
+ @WithMockUser(authorities = "READ_ALL")
+ void getPersons_returns400_whenPageIsNegative() throws Exception {
+ mockMvc.perform(get("/api/persons").param("page", "-1"))
+ .andExpect(status().isBadRequest());
+ }
+
+ @Test
+ @WithMockUser(authorities = "READ_ALL")
+ void getPersons_delegatesTopByDocumentCount_whenSortGiven() throws Exception {
PersonSummaryDTO top = mockPersonSummary("Käthe", "Raddatz");
when(personService.findTopByDocumentCount(4)).thenReturn(List.of(top));
mockMvc.perform(get("/api/persons").param("sort", "documentCount").param("size", "4"))
.andExpect(status().isOk())
- .andExpect(jsonPath("$[0].firstName").value("Käthe"));
+ .andExpect(jsonPath("$.items[0].firstName").value("Käthe"));
}
@Test
@WithMockUser(authorities = "READ_ALL")
- void getPersons_capsTopByDocumentCount_atFifty() throws Exception {
- ArgumentCaptor sizeCaptor = ArgumentCaptor.forClass(Integer.class);
- when(personService.findTopByDocumentCount(sizeCaptor.capture())).thenReturn(Collections.emptyList());
+ void getPersons_topByDocumentCount_isNonPaged_totalElementsEqualsReturnedCount() throws Exception {
+ // The top-N dashboard path is deliberately NON-paged: it returns the complete result
+ // (no further page exists), so totalElements equals the number of rows returned and
+ // totalPages is 1. Pinned so nobody "fixes" it into a misleading paged total.
+ when(personService.findTopByDocumentCount(50))
+ .thenReturn(List.of(mockPersonSummary("Käthe", "Raddatz"),
+ mockPersonSummary("Hans", "Müller")));
- mockMvc.perform(get("/api/persons").param("sort", "documentCount").param("size", "999"))
- .andExpect(status().isOk());
+ mockMvc.perform(get("/api/persons").param("sort", "documentCount"))
+ .andExpect(status().isOk())
+ .andExpect(jsonPath("$.items.length()").value(2))
+ .andExpect(jsonPath("$.totalElements").value(2))
+ .andExpect(jsonPath("$.pageNumber").value(0))
+ .andExpect(jsonPath("$.pageSize").value(2))
+ .andExpect(jsonPath("$.totalPages").value(1));
+ }
- assertThat(sizeCaptor.getValue()).isEqualTo(50);
+ @Test
+ @WithMockUser(authorities = "READ_ALL")
+ void getPersons_topByDocumentCount_emptyResult_reportsZeroPages() throws Exception {
+ when(personService.findTopByDocumentCount(50)).thenReturn(Collections.emptyList());
+
+ mockMvc.perform(get("/api/persons").param("sort", "documentCount"))
+ .andExpect(status().isOk())
+ .andExpect(jsonPath("$.totalElements").value(0))
+ .andExpect(jsonPath("$.totalPages").value(0));
}
private PersonSummaryDTO mockPersonSummary(String firstName, String lastName) {
@@ -117,6 +217,7 @@ class PersonControllerTest {
public Integer getDeathYear() { return null; }
public String getNotes() { return null; }
public boolean isFamilyMember() { return false; }
+ public boolean isProvisional() { return false; }
public long getDocumentCount() { return 0; }
};
}
@@ -397,6 +498,61 @@ class PersonControllerTest {
.andExpect(status().isNoContent());
}
+ // ─── PATCH /api/persons/{id}/confirm ──────────────────────────────────────
+
+ @Test
+ void confirmPerson_returns401_whenUnauthenticated() throws Exception {
+ mockMvc.perform(patch("/api/persons/{id}/confirm", UUID.randomUUID()).with(csrf()))
+ .andExpect(status().isUnauthorized());
+ }
+
+ @Test
+ @WithMockUser(authorities = "READ_ALL")
+ void confirmPerson_returns403_whenUserHasOnlyReadPermission() throws Exception {
+ mockMvc.perform(patch("/api/persons/{id}/confirm", UUID.randomUUID()).with(csrf()))
+ .andExpect(status().isForbidden());
+ }
+
+ @Test
+ @WithMockUser(authorities = "WRITE_ALL")
+ void confirmPerson_returns200_andClearsProvisional() throws Exception {
+ UUID id = UUID.randomUUID();
+ Person confirmed = Person.builder().id(id).firstName("Bald").lastName("Bestaetigt").provisional(false).build();
+ when(personService.confirmPerson(id)).thenReturn(confirmed);
+
+ mockMvc.perform(patch("/api/persons/{id}/confirm", id).with(csrf()))
+ .andExpect(status().isOk())
+ .andExpect(jsonPath("$.provisional").value(false));
+
+ verify(personService).confirmPerson(id);
+ }
+
+ // ─── DELETE /api/persons/{id} ──────────────────────────────────────────────
+
+ @Test
+ void deletePerson_returns401_whenUnauthenticated() throws Exception {
+ mockMvc.perform(delete("/api/persons/{id}", UUID.randomUUID()).with(csrf()))
+ .andExpect(status().isUnauthorized());
+ }
+
+ @Test
+ @WithMockUser(authorities = "READ_ALL")
+ void deletePerson_returns403_whenUserHasOnlyReadPermission() throws Exception {
+ mockMvc.perform(delete("/api/persons/{id}", UUID.randomUUID()).with(csrf()))
+ .andExpect(status().isForbidden());
+ }
+
+ @Test
+ @WithMockUser(authorities = "WRITE_ALL")
+ void deletePerson_returns204_whenValid() throws Exception {
+ UUID id = UUID.randomUUID();
+
+ mockMvc.perform(delete("/api/persons/{id}", id).with(csrf()))
+ .andExpect(status().isNoContent());
+
+ verify(personService).deletePerson(id);
+ }
+
// ─── PUT /api/persons/{id} — lastName blank branch ────────────────────────
@Test
diff --git a/backend/src/test/java/org/raddatz/familienarchiv/person/PersonImportUpsertTest.java b/backend/src/test/java/org/raddatz/familienarchiv/person/PersonImportUpsertTest.java
new file mode 100644
index 00000000..c8b81b2b
--- /dev/null
+++ b/backend/src/test/java/org/raddatz/familienarchiv/person/PersonImportUpsertTest.java
@@ -0,0 +1,151 @@
+package org.raddatz.familienarchiv.person;
+
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.extension.ExtendWith;
+import org.mockito.InjectMocks;
+import org.mockito.Mock;
+import org.mockito.junit.jupiter.MockitoExtension;
+
+import java.util.Optional;
+import java.util.UUID;
+
+import static org.assertj.core.api.Assertions.assertThat;
+import static org.mockito.ArgumentMatchers.any;
+import static org.mockito.ArgumentMatchers.argThat;
+import static org.mockito.Mockito.never;
+import static org.mockito.Mockito.verify;
+import static org.mockito.Mockito.when;
+
+@ExtendWith(MockitoExtension.class)
+class PersonImportUpsertTest {
+
+ @Mock PersonRepository personRepository;
+ @Mock PersonNameAliasRepository aliasRepository;
+ @InjectMocks PersonService personService;
+
+ @Test
+ void upsertBySourceRef_insertsNewPerson_whenSourceRefUnknown() {
+ when(personRepository.findBySourceRef("clara-cram")).thenReturn(Optional.empty());
+ when(personRepository.save(any())).thenAnswer(inv -> inv.getArgument(0));
+
+ PersonUpsertCommand cmd = PersonUpsertCommand.builder()
+ .sourceRef("clara-cram").firstName("Clara").lastName("Cram")
+ .personType(PersonType.PERSON).provisional(false).build();
+
+ Person result = personService.upsertBySourceRef(cmd);
+
+ assertThat(result.getSourceRef()).isEqualTo("clara-cram");
+ assertThat(result.getFirstName()).isEqualTo("Clara");
+ assertThat(result.getLastName()).isEqualTo("Cram");
+ assertThat(result.isProvisional()).isFalse();
+ }
+
+ @Test
+ void upsertBySourceRef_updatesInPlace_whenSourceRefExists() {
+ Person existing = Person.builder()
+ .id(UUID.randomUUID()).sourceRef("clara-cram")
+ .firstName("Clara").lastName("Cram").build();
+ when(personRepository.findBySourceRef("clara-cram")).thenReturn(Optional.of(existing));
+ when(personRepository.save(any())).thenAnswer(inv -> inv.getArgument(0));
+
+ PersonUpsertCommand cmd = PersonUpsertCommand.builder()
+ .sourceRef("clara-cram").firstName("Clara").lastName("Cram")
+ .notes("Updated note").personType(PersonType.PERSON).provisional(false).build();
+
+ personService.upsertBySourceRef(cmd);
+
+ verify(personRepository).save(argThat(p -> p.getId().equals(existing.getId())));
+ verify(personRepository, never()).save(argThat(p -> p.getId() == null));
+ }
+
+ @Test
+ void upsertBySourceRef_preservesHumanEditedNonBlankFields() {
+ // A human renamed the maiden-name register person and added notes in-app.
+ Person humanEdited = Person.builder()
+ .id(UUID.randomUUID()).sourceRef("clara-cram")
+ .firstName("Klara").lastName("Cram-Müller").notes("Verified by Marcel").build();
+ when(personRepository.findBySourceRef("clara-cram")).thenReturn(Optional.of(humanEdited));
+ when(personRepository.save(any())).thenAnswer(inv -> inv.getArgument(0));
+
+ PersonUpsertCommand cmd = PersonUpsertCommand.builder()
+ .sourceRef("clara-cram").firstName("Clara").lastName("Cram")
+ .notes("Auto note").personType(PersonType.PERSON).provisional(false).build();
+
+ Person result = personService.upsertBySourceRef(cmd);
+
+ // Human edits survive the re-import.
+ assertThat(result.getFirstName()).isEqualTo("Klara");
+ assertThat(result.getLastName()).isEqualTo("Cram-Müller");
+ assertThat(result.getNotes()).isEqualTo("Verified by Marcel");
+ }
+
+ @Test
+ void upsertBySourceRef_fillsOnlyBlankFields_onReimport() {
+ Person existing = Person.builder()
+ .id(UUID.randomUUID()).sourceRef("clara-cram")
+ .firstName("Clara").lastName("Cram").notes(null).build();
+ when(personRepository.findBySourceRef("clara-cram")).thenReturn(Optional.of(existing));
+ when(personRepository.save(any())).thenAnswer(inv -> inv.getArgument(0));
+
+ PersonUpsertCommand cmd = PersonUpsertCommand.builder()
+ .sourceRef("clara-cram").firstName("Clara").lastName("Cram")
+ .notes("Nichte von Herbert").personType(PersonType.PERSON).provisional(false).build();
+
+ Person result = personService.upsertBySourceRef(cmd);
+
+ // Blank field gets filled by canonical value.
+ assertThat(result.getNotes()).isEqualTo("Nichte von Herbert");
+ }
+
+ @Test
+ void upsertBySourceRef_fillsBlankYears_butPreservesHumanEditedYears_onReimport() {
+ // Existing has a human-set birthYear and a blank deathYear.
+ Person existing = Person.builder()
+ .id(UUID.randomUUID()).sourceRef("clara-cram")
+ .lastName("Cram").birthYear(1890).deathYear(null).build();
+ when(personRepository.findBySourceRef("clara-cram")).thenReturn(Optional.of(existing));
+ when(personRepository.save(any())).thenAnswer(inv -> inv.getArgument(0));
+
+ PersonUpsertCommand cmd = PersonUpsertCommand.builder()
+ .sourceRef("clara-cram").lastName("Cram")
+ .birthYear(1888).deathYear(1965)
+ .personType(PersonType.PERSON).provisional(false).build();
+
+ Person result = personService.upsertBySourceRef(cmd);
+
+ assertThat(result.getBirthYear()).isEqualTo(1890); // human value kept
+ assertThat(result.getDeathYear()).isEqualTo(1965); // blank filled from canonical
+ }
+
+ @Test
+ void upsertBySourceRef_neverFlipsProvisionalBackToTrue_onceHumanConfirmed() {
+ // A human confirmed this provisional importer-created person (provisional -> false).
+ Person confirmed = Person.builder()
+ .id(UUID.randomUUID()).sourceRef("schwester-hanni")
+ .firstName(null).lastName("Schwester Hanni").provisional(false).build();
+ when(personRepository.findBySourceRef("schwester-hanni")).thenReturn(Optional.of(confirmed));
+ when(personRepository.save(any())).thenAnswer(inv -> inv.getArgument(0));
+
+ PersonUpsertCommand cmd = PersonUpsertCommand.builder()
+ .sourceRef("schwester-hanni").lastName("Schwester Hanni")
+ .personType(PersonType.PERSON).provisional(true).build();
+
+ Person result = personService.upsertBySourceRef(cmd);
+
+ assertThat(result.isProvisional()).isFalse();
+ }
+
+ @Test
+ void upsertBySourceRef_setsProvisionalTrue_forNewProvisionalPerson() {
+ when(personRepository.findBySourceRef("noise-geschirr")).thenReturn(Optional.empty());
+ when(personRepository.save(any())).thenAnswer(inv -> inv.getArgument(0));
+
+ PersonUpsertCommand cmd = PersonUpsertCommand.builder()
+ .sourceRef("noise-geschirr").lastName("Tante Tüten")
+ .personType(PersonType.PERSON).provisional(true).build();
+
+ Person result = personService.upsertBySourceRef(cmd);
+
+ assertThat(result.isProvisional()).isTrue();
+ }
+}
diff --git a/backend/src/test/java/org/raddatz/familienarchiv/person/PersonRepositoryTest.java b/backend/src/test/java/org/raddatz/familienarchiv/person/PersonRepositoryTest.java
index 8ccf27ba..910e701e 100644
--- a/backend/src/test/java/org/raddatz/familienarchiv/person/PersonRepositoryTest.java
+++ b/backend/src/test/java/org/raddatz/familienarchiv/person/PersonRepositoryTest.java
@@ -463,4 +463,213 @@ class PersonRepositoryTest {
assertThat(result).hasSize(1);
assertThat(result.get(0).getLastName()).isEqualTo("Gesellschafter des Verlages");
}
+
+ // ─── #671: provisional must be SELECTed in all three native projections ───
+ // Adding isProvisional() to the interface compiles even if a native query forgets
+ // to SELECT p.provisional — it then silently returns false. These tests are the only
+ // guard against that trap, so they must run against real Postgres.
+
+ @Test
+ void findAllWithDocumentCount_projectsProvisionalTrue() {
+ personRepository.save(Person.builder()
+ .firstName("Inferred").lastName("Person").provisional(true).build());
+
+ List result = personRepository.findAllWithDocumentCount();
+
+ assertThat(result).anyMatch(PersonSummaryDTO::isProvisional);
+ }
+
+ @Test
+ void searchWithDocumentCount_projectsProvisionalTrue() {
+ personRepository.save(Person.builder()
+ .firstName("Provisorisch").lastName("Müller").provisional(true).build());
+
+ List result = personRepository.searchWithDocumentCount("Provisorisch");
+
+ assertThat(result).hasSize(1);
+ assertThat(result.get(0).isProvisional()).isTrue();
+ }
+
+ @Test
+ void findTopByDocumentCount_projectsProvisionalTrue() {
+ Person provisional = personRepository.save(Person.builder()
+ .firstName("Top").lastName("Provisional").provisional(true).build());
+ documentRepository.save(Document.builder()
+ .title("Brief").originalFilename("b.pdf")
+ .status(DocumentStatus.UPLOADED)
+ .sender(provisional).build());
+
+ List result = personRepository.findTopByDocumentCount(10);
+
+ PersonSummaryDTO summary = result.stream()
+ .filter(p -> p.getId().equals(provisional.getId())).findFirst().orElseThrow();
+ assertThat(summary.isProvisional()).isTrue();
+ }
+
+ // ─── #667: filter-aware paged slice + paired COUNT (Postgres-only) ────────
+ // The slice query (findByFilter) and the count query (countByFilter) MUST share one
+ // WHERE clause so totalElements can never drift from the rendered page. These tests run
+ // against real Postgres because the slice ORDER BY uses a computed alias that fails on H2.
+
+ private void seedDirectoryFixture() {
+ // Register family member, no documents — visible by reader default (familyMember)
+ personRepository.save(Person.builder().firstName("Karl").lastName("Register").familyMember(true).build());
+ // Person with one document — visible by reader default (documentCount > 0)
+ Person hasDoc = personRepository.save(Person.builder().firstName("Doku").lastName("Person").build());
+ documentRepository.save(Document.builder().title("B").originalFilename("b.pdf")
+ .status(DocumentStatus.UPLOADED).sender(hasDoc).build());
+ // Provisional, zero-document, non-family — hidden by reader default
+ personRepository.save(Person.builder().firstName("Unbe").lastName("Staetigt").provisional(true).build());
+ // An institution with no documents, non-family, non-provisional
+ personRepository.save(Person.builder().lastName("Verlag GmbH").personType(PersonType.INSTITUTION).build());
+ }
+
+ @Test
+ void findByFilter_readerDefault_returnsOnlyFamilyOrWithDocuments() {
+ seedDirectoryFixture();
+
+ List slice = personRepository.findByFilter(
+ null, null, null, null, true, null, 50, 0);
+
+ assertThat(slice).extracting(PersonSummaryDTO::getLastName)
+ .containsExactlyInAnyOrder("Register", "Person");
+ }
+
+ @Test
+ void countByFilter_readerDefault_matchesSliceSize() {
+ seedDirectoryFixture();
+
+ long count = personRepository.countByFilter(null, null, null, null, true, null);
+
+ assertThat(count).isEqualTo(2);
+ }
+
+ @Test
+ void findByFilter_showAll_returnsEveryone() {
+ seedDirectoryFixture();
+
+ List slice = personRepository.findByFilter(
+ null, null, null, null, false, null, 50, 0);
+
+ assertThat(slice).hasSize(4);
+ }
+
+ @Test
+ void findByFilter_typeInstitution_returnsOnlyInstitutions() {
+ seedDirectoryFixture();
+
+ List slice = personRepository.findByFilter(
+ "INSTITUTION", null, null, null, false, null, 50, 0);
+
+ assertThat(slice).extracting(PersonSummaryDTO::getLastName).containsExactly("Verlag GmbH");
+ }
+
+ @Test
+ void findByFilter_familyOnly_returnsOnlyFamilyMembers() {
+ seedDirectoryFixture();
+
+ List slice = personRepository.findByFilter(
+ null, true, null, null, false, null, 50, 0);
+
+ assertThat(slice).extracting(PersonSummaryDTO::getLastName).containsExactly("Register");
+ }
+
+ @Test
+ void findByFilter_hasDocuments_returnsOnlyPersonsWithDocuments() {
+ seedDirectoryFixture();
+
+ List slice = personRepository.findByFilter(
+ null, null, true, null, false, null, 50, 0);
+
+ assertThat(slice).extracting(PersonSummaryDTO::getLastName).containsExactly("Person");
+ }
+
+ @Test
+ void findByFilter_provisionalTrue_returnsOnlyProvisional() {
+ seedDirectoryFixture();
+
+ List slice = personRepository.findByFilter(
+ null, null, null, true, false, null, 50, 0);
+
+ assertThat(slice).extracting(PersonSummaryDTO::getLastName).containsExactly("Staetigt");
+ }
+
+ @Test
+ void findByFilter_combinedFilters_andTogether() {
+ seedDirectoryFixture();
+ // family + has-documents → intersection is empty (Register has no docs, Doku is not family)
+ List slice = personRepository.findByFilter(
+ null, true, true, null, false, null, 50, 0);
+
+ assertThat(slice).isEmpty();
+ }
+
+ @Test
+ void findByFilter_query_combinesWithFilters() {
+ seedDirectoryFixture();
+
+ List slice = personRepository.findByFilter(
+ null, null, null, null, false, "Verlag", 50, 0);
+
+ assertThat(slice).extracting(PersonSummaryDTO::getLastName).containsExactly("Verlag GmbH");
+ }
+
+ @Test
+ void findByFilter_pageBeyondRange_returnsEmptySlice() {
+ seedDirectoryFixture();
+
+ List slice = personRepository.findByFilter(
+ null, null, null, null, false, null, 50, 999 * 50);
+
+ assertThat(slice).isEmpty();
+ }
+
+ @Test
+ void findByFilter_respectsPageSize() {
+ seedDirectoryFixture();
+
+ List firstPage = personRepository.findByFilter(
+ null, null, null, null, false, null, 2, 0);
+ List secondPage = personRepository.findByFilter(
+ null, null, null, null, false, null, 2, 2);
+
+ assertThat(firstPage).hasSize(2);
+ assertThat(secondPage).hasSize(2);
+ assertThat(firstPage).extracting(PersonSummaryDTO::getId)
+ .doesNotContainAnyElementsOf(secondPage.stream().map(PersonSummaryDTO::getId).toList());
+ }
+
+ @Test
+ void countByFilter_typeInstitution_matchesSlice() {
+ seedDirectoryFixture();
+
+ long count = personRepository.countByFilter("INSTITUTION", null, null, null, false, null);
+
+ assertThat(count).isEqualTo(1);
+ }
+
+ @Test
+ void countByFilter_query_matchesSliceSize() {
+ // The whole point of the shared FILTER_WHERE is that the slice and the count can never
+ // drift. Pin the query (LIKE) path explicitly: countByFilter must equal the slice size
+ // so a future edit to one query's LIKE clause is caught.
+ seedDirectoryFixture();
+
+ List slice = personRepository.findByFilter(
+ null, null, null, null, false, "Verlag", 50, 0);
+ long count = personRepository.countByFilter(null, null, null, null, false, "Verlag");
+
+ assertThat(count).isEqualTo(slice.size());
+ assertThat(count).isEqualTo(1);
+ }
+
+ @Test
+ void findByFilter_projectsDocumentCount() {
+ seedDirectoryFixture();
+
+ List slice = personRepository.findByFilter(
+ null, null, true, null, false, null, 50, 0);
+
+ assertThat(slice.get(0).getDocumentCount()).isEqualTo(1);
+ }
}
diff --git a/backend/src/test/java/org/raddatz/familienarchiv/person/PersonServiceIntegrationTest.java b/backend/src/test/java/org/raddatz/familienarchiv/person/PersonServiceIntegrationTest.java
index e8d5ed97..0578f5fb 100644
--- a/backend/src/test/java/org/raddatz/familienarchiv/person/PersonServiceIntegrationTest.java
+++ b/backend/src/test/java/org/raddatz/familienarchiv/person/PersonServiceIntegrationTest.java
@@ -2,6 +2,9 @@ package org.raddatz.familienarchiv.person;
import org.junit.jupiter.api.Test;
import org.raddatz.familienarchiv.PostgresContainerConfig;
+import org.raddatz.familienarchiv.document.Document;
+import org.raddatz.familienarchiv.document.DocumentRepository;
+import org.raddatz.familienarchiv.document.DocumentStatus;
import org.raddatz.familienarchiv.person.Person;
import org.raddatz.familienarchiv.person.PersonType;
import org.raddatz.familienarchiv.person.PersonRepository;
@@ -13,6 +16,11 @@ import org.springframework.test.context.bean.override.mockito.MockitoBean;
import org.springframework.transaction.annotation.Transactional;
import software.amazon.awssdk.services.s3.S3Client;
+import jakarta.persistence.EntityManager;
+import jakarta.persistence.PersistenceContext;
+
+import java.util.Set;
+
import static org.assertj.core.api.Assertions.assertThat;
@SpringBootTest(webEnvironment = SpringBootTest.WebEnvironment.NONE)
@@ -24,6 +32,9 @@ class PersonServiceIntegrationTest {
@MockitoBean S3Client s3Client;
@Autowired PersonService personService;
@Autowired PersonRepository personRepository;
+ @Autowired DocumentRepository documentRepository;
+
+ @PersistenceContext EntityManager entityManager;
@Test
void findOrCreateByAlias_skipReturnsNull_noRecordCreated() {
@@ -63,4 +74,97 @@ class PersonServiceIntegrationTest {
assertThat(result.getFirstName()).isEqualTo("Clara");
assertThat(result.getLastName()).isEqualTo("Cram");
}
+
+ // ─── #667: confirm round-trip + reader-default semantics ──────────────────
+
+ @Test
+ void search_readerDefault_hidesProvisionalZeroDocumentPerson() {
+ personRepository.save(Person.builder()
+ .firstName("Unbe").lastName("Staetigt").provisional(true).build());
+
+ PersonSearchResult result = personService.search(PersonFilter.cleanDefault(), 0, 50, null);
+
+ assertThat(result.items()).noneMatch(p -> p.getLastName().equals("Staetigt"));
+ assertThat(result.totalElements()).isEqualTo(result.items().size());
+ }
+
+ @Test
+ void search_showAll_includesProvisionalZeroDocumentPerson() {
+ personRepository.save(Person.builder()
+ .firstName("Unbe").lastName("Staetigt").provisional(true).build());
+
+ PersonSearchResult result = personService.search(PersonFilter.showAll(), 0, 50, null);
+
+ assertThat(result.items()).anyMatch(p -> p.getLastName().equals("Staetigt"));
+ }
+
+ @Test
+ void confirmPerson_clearsProvisional_andShowAllTreatsItAsConfirmed() {
+ Person provisional = personRepository.save(Person.builder()
+ .firstName("Bald").lastName("Bestaetigt").provisional(true).build());
+
+ personService.confirmPerson(provisional.getId());
+
+ Person reloaded = personRepository.findById(provisional.getId()).orElseThrow();
+ assertThat(reloaded.isProvisional()).isFalse();
+
+ PersonSearchResult showAll = personService.search(PersonFilter.showAll(), 0, 50, null);
+ assertThat(showAll.items())
+ .filteredOn(p -> p.getId().equals(provisional.getId()))
+ .allMatch(p -> !p.isProvisional());
+ }
+
+ @Test
+ void deletePerson_removesPerson() {
+ Person target = personRepository.save(Person.builder()
+ .firstName("Weg").lastName("Person").provisional(true).build());
+
+ personService.deletePerson(target.getId());
+
+ assertThat(personRepository.findById(target.getId())).isEmpty();
+ }
+
+ @Test
+ void deletePerson_detachesSentAndReceivedReferences_beforeDelete_noOrphan() {
+ // A person referenced as BOTH a document sender and a document receiver must delete
+ // cleanly: deletePerson nulls the sender_id FK and removes the receiver join row first
+ // (reassignSenderToNull → deleteReceiverReferences → deleteById), so no FK orphan and
+ // the documents themselves survive.
+ Person target = personRepository.save(Person.builder()
+ .firstName("Weg").lastName("Person").provisional(true).build());
+ Person bystander = personRepository.save(Person.builder()
+ .firstName("Bleibt").lastName("Hier").build());
+
+ Document sent = documentRepository.save(Document.builder()
+ .title("Sent letter").originalFilename("sent.pdf")
+ .status(DocumentStatus.UPLOADED).sender(target).build());
+ Document received = documentRepository.save(Document.builder()
+ .title("Received letter").originalFilename("received.pdf")
+ .status(DocumentStatus.UPLOADED).sender(bystander)
+ .receivers(new java.util.HashSet<>(Set.of(target))).build());
+
+ // Persist the fixture and detach everything so the native @Modifying deletes operate on
+ // the database directly without the persistence context holding stale references that
+ // would re-flush a now-deleted person as a transient association.
+ entityManager.flush();
+ entityManager.clear();
+
+ personService.deletePerson(target.getId());
+
+ // Native @Modifying queries bypass the persistence context — clear it so the asserting
+ // reads observe the post-delete database state, not stale managed entities.
+ entityManager.flush();
+ entityManager.clear();
+
+ assertThat(personRepository.findById(target.getId())).isEmpty();
+
+ Document reloadedSent = documentRepository.findById(sent.getId()).orElseThrow();
+ assertThat(reloadedSent.getSender()).isNull();
+
+ Document reloadedReceived = documentRepository.findById(received.getId()).orElseThrow();
+ assertThat(reloadedReceived.getReceivers())
+ .noneMatch(p -> p.getId().equals(target.getId()));
+ // The other person and the documents themselves survive the delete.
+ assertThat(personRepository.findById(bystander.getId())).isPresent();
+ }
}
diff --git a/backend/src/test/java/org/raddatz/familienarchiv/person/PersonServiceTest.java b/backend/src/test/java/org/raddatz/familienarchiv/person/PersonServiceTest.java
index 1ad9ce27..4c8de65c 100644
--- a/backend/src/test/java/org/raddatz/familienarchiv/person/PersonServiceTest.java
+++ b/backend/src/test/java/org/raddatz/familienarchiv/person/PersonServiceTest.java
@@ -58,33 +58,109 @@ class PersonServiceTest {
assertThat(personService.getById(id)).isEqualTo(person);
}
- // ─── findAll ─────────────────────────────────────────────────────────────
+ // ─── #667: search (filter + pagination) ──────────────────────────────────
@Test
- void findAll_returnsAll_whenQueryIsNull() {
- List expected = List.of();
- when(personRepository.findAllWithDocumentCount()).thenReturn(expected);
+ void search_returnsPagedResult_withTotalsFromCountQuery() {
+ PersonFilter filter = PersonFilter.cleanDefault();
+ when(personRepository.countByFilter(null, null, null, null, true, null)).thenReturn(120L);
+ when(personRepository.findByFilter(null, null, null, null, true, null, 50, 0))
+ .thenReturn(List.of());
- assertThat(personService.findAll(null)).isEqualTo(expected);
- verify(personRepository).findAllWithDocumentCount();
- verify(personRepository, never()).searchWithDocumentCount(any());
+ PersonSearchResult result = personService.search(filter, 0, 50, null);
+
+ assertThat(result.totalElements()).isEqualTo(120L);
+ assertThat(result.pageNumber()).isEqualTo(0);
+ assertThat(result.pageSize()).isEqualTo(50);
+ assertThat(result.totalPages()).isEqualTo(3); // ceil(120 / 50)
}
@Test
- void findAll_returnsEmpty_whenQueryIsWhitespaceOnly() {
- assertThat(personService.findAll(" ")).isEmpty();
- verify(personRepository, never()).findAllWithDocumentCount();
- verify(personRepository, never()).searchWithDocumentCount(any());
+ void search_passesTypeAsEnumName_toRepository() {
+ PersonFilter filter = PersonFilter.builder().type(PersonType.INSTITUTION).build();
+ when(personRepository.countByFilter("INSTITUTION", null, null, null, false, null)).thenReturn(0L);
+ when(personRepository.findByFilter("INSTITUTION", null, null, null, false, null, 50, 0))
+ .thenReturn(List.of());
+
+ personService.search(filter, 0, 50, null);
+
+ verify(personRepository).findByFilter("INSTITUTION", null, null, null, false, null, 50, 0);
}
@Test
- void findAll_searchesByName_whenQueryIsNonBlank() {
- List expected = List.of();
- when(personRepository.searchWithDocumentCount("Anna")).thenReturn(expected);
+ void search_computesOffset_fromPageAndSize() {
+ PersonFilter filter = PersonFilter.showAll();
+ when(personRepository.countByFilter(null, null, null, null, false, null)).thenReturn(0L);
+ when(personRepository.findByFilter(null, null, null, null, false, null, 20, 40))
+ .thenReturn(List.of());
- assertThat(personService.findAll("Anna")).isEqualTo(expected);
- verify(personRepository).searchWithDocumentCount("Anna");
- verify(personRepository, never()).findAllWithDocumentCount();
+ personService.search(filter, 2, 20, null); // offset = page * size = 40
+
+ verify(personRepository).findByFilter(null, null, null, null, false, null, 20, 40);
+ }
+
+ @Test
+ void search_trimsBlankQueryToNull() {
+ PersonFilter filter = PersonFilter.showAll();
+ when(personRepository.countByFilter(null, null, null, null, false, null)).thenReturn(0L);
+ when(personRepository.findByFilter(null, null, null, null, false, null, 50, 0))
+ .thenReturn(List.of());
+
+ personService.search(filter, 0, 50, " ");
+
+ verify(personRepository).findByFilter(null, null, null, null, false, null, 50, 0);
+ }
+
+ // ─── #667: confirmPerson ──────────────────────────────────────────────────
+
+ @Test
+ void confirmPerson_clearsProvisionalFlag() {
+ UUID id = UUID.randomUUID();
+ Person provisional = Person.builder().id(id).firstName("Inferred").lastName("Person").provisional(true).build();
+ when(personRepository.findById(id)).thenReturn(Optional.of(provisional));
+ when(personRepository.save(any())).thenAnswer(inv -> inv.getArgument(0));
+
+ Person result = personService.confirmPerson(id);
+
+ assertThat(result.isProvisional()).isFalse();
+ verify(personRepository).save(argThat(p -> !p.isProvisional()));
+ }
+
+ @Test
+ void confirmPerson_throwsNotFound_whenMissing() {
+ UUID id = UUID.randomUUID();
+ when(personRepository.findById(id)).thenReturn(Optional.empty());
+
+ assertThatThrownBy(() -> personService.confirmPerson(id))
+ .isInstanceOf(DomainException.class)
+ .extracting(e -> ((DomainException) e).getStatus().value())
+ .isEqualTo(404);
+ }
+
+ // ─── #667: deletePerson ───────────────────────────────────────────────────
+
+ @Test
+ void deletePerson_deletes_whenPersonExists() {
+ UUID id = UUID.randomUUID();
+ Person person = Person.builder().id(id).firstName("Weg").lastName("Person").build();
+ when(personRepository.findById(id)).thenReturn(Optional.of(person));
+
+ personService.deletePerson(id);
+
+ verify(personRepository).reassignSenderToNull(id);
+ verify(personRepository).deleteReceiverReferences(id);
+ verify(personRepository).deleteById(id);
+ }
+
+ @Test
+ void deletePerson_throwsNotFound_whenMissing() {
+ UUID id = UUID.randomUUID();
+ when(personRepository.findById(id)).thenReturn(Optional.empty());
+
+ assertThatThrownBy(() -> personService.deletePerson(id))
+ .isInstanceOf(DomainException.class)
+ .extracting(e -> ((DomainException) e).getStatus().value())
+ .isEqualTo(404);
}
// ─── createPerson ─────────────────────────────────────────────────────────
diff --git a/backend/src/test/java/org/raddatz/familienarchiv/person/relationship/RelationshipServiceIntegrationTest.java b/backend/src/test/java/org/raddatz/familienarchiv/person/relationship/RelationshipServiceIntegrationTest.java
index a2d4a5f2..acbb3825 100644
--- a/backend/src/test/java/org/raddatz/familienarchiv/person/relationship/RelationshipServiceIntegrationTest.java
+++ b/backend/src/test/java/org/raddatz/familienarchiv/person/relationship/RelationshipServiceIntegrationTest.java
@@ -144,10 +144,12 @@ class RelationshipServiceIntegrationTest {
@Test
void setFamilyMember_true_makes_person_appear_in_network() {
- // charlie starts with familyMember = false. Add a PARENT_OF edge alice→charlie
- // so the edge exists, then flip charlie's flag and verify he appears in nodes.
+ // addRelationship side-effects family_member=true on both endpoints for family-graph
+ // edges (PARENT_OF/SPOUSE_OF/SIBLING_OF). Reset charlie so the explicit
+ // setFamilyMember(true) call below is the thing under test, not the auto-flip.
relationshipService.addRelationship(alice.getId(),
new CreateRelationshipRequest(charlie.getId(), RelationType.PARENT_OF, null, null, null));
+ relationshipService.setFamilyMember(charlie.getId(), false);
NetworkDTO before = relationshipService.getFamilyNetwork();
assertThat(before.nodes()).extracting("id").doesNotContain(charlie.getId());
diff --git a/backend/src/test/java/org/raddatz/familienarchiv/person/relationship/RelationshipServiceTest.java b/backend/src/test/java/org/raddatz/familienarchiv/person/relationship/RelationshipServiceTest.java
index 0a0b963c..c8d1faf6 100644
--- a/backend/src/test/java/org/raddatz/familienarchiv/person/relationship/RelationshipServiceTest.java
+++ b/backend/src/test/java/org/raddatz/familienarchiv/person/relationship/RelationshipServiceTest.java
@@ -23,6 +23,8 @@ import java.util.UUID;
import static org.assertj.core.api.Assertions.assertThat;
import static org.assertj.core.api.Assertions.assertThatThrownBy;
import static org.mockito.ArgumentMatchers.any;
+import static org.mockito.ArgumentMatchers.anyBoolean;
+import static org.mockito.ArgumentMatchers.eq;
import static org.mockito.Mockito.never;
import static org.mockito.Mockito.verify;
import static org.mockito.Mockito.when;
@@ -148,6 +150,50 @@ class RelationshipServiceTest {
assertThat(result.notes()).isEqualTo("first born");
}
+ @Test
+ void addRelationship_marks_both_endpoints_as_family_member_when_type_is_family() {
+ // Creating a family-graph edge (PARENT_OF / SPOUSE_OF / SIBLING_OF) must mark both
+ // endpoints as family members so they appear in findAllFamilyMembers and the network.
+ // This is what makes the canonical importer's relationships actually show up in the UI.
+ when(personService.getById(alice.getId())).thenReturn(alice);
+ when(personService.getById(bob.getId())).thenReturn(bob);
+ when(relationshipRepository.existsByPersonIdAndRelatedPersonIdAndRelationType(
+ bob.getId(), alice.getId(), RelationType.PARENT_OF)).thenReturn(false);
+ when(relationshipRepository.saveAndFlush(any())).thenAnswer(inv -> {
+ PersonRelationship r = inv.getArgument(0);
+ r.setId(UUID.randomUUID());
+ r.setCreatedAt(Instant.now());
+ return r;
+ });
+
+ var dto = new CreateRelationshipRequest(bob.getId(), RelationType.PARENT_OF, null, null, null);
+ service.addRelationship(alice.getId(), dto);
+
+ verify(personService).setFamilyMember(alice.getId(), true);
+ verify(personService).setFamilyMember(bob.getId(), true);
+ }
+
+ @Test
+ void addRelationship_does_not_flip_family_member_for_non_family_type() {
+ // FRIEND / COLLEAGUE / EMPLOYER / DOCTOR / NEIGHBOR / OTHER are NOT family-graph
+ // edges (see getFamilyNetwork's filter), so addRelationship must leave family_member
+ // alone — a doctor of the family is not a family member.
+ when(personService.getById(alice.getId())).thenReturn(alice);
+ when(personService.getById(bob.getId())).thenReturn(bob);
+ when(relationshipRepository.saveAndFlush(any())).thenAnswer(inv -> {
+ PersonRelationship r = inv.getArgument(0);
+ r.setId(UUID.randomUUID());
+ r.setCreatedAt(Instant.now());
+ return r;
+ });
+
+ var dto = new CreateRelationshipRequest(bob.getId(), RelationType.FRIEND, null, null, null);
+ service.addRelationship(alice.getId(), dto);
+
+ verify(personService, never()).setFamilyMember(eq(alice.getId()), anyBoolean());
+ verify(personService, never()).setFamilyMember(eq(bob.getId()), anyBoolean());
+ }
+
@Test
void deleteRelationship_succeeds_when_viewpoint_is_object() {
UUID relId = UUID.randomUUID();
diff --git a/backend/src/test/java/org/raddatz/familienarchiv/tag/TagImportUpsertTest.java b/backend/src/test/java/org/raddatz/familienarchiv/tag/TagImportUpsertTest.java
new file mode 100644
index 00000000..c2e29dc0
--- /dev/null
+++ b/backend/src/test/java/org/raddatz/familienarchiv/tag/TagImportUpsertTest.java
@@ -0,0 +1,62 @@
+package org.raddatz.familienarchiv.tag;
+
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.extension.ExtendWith;
+import org.mockito.InjectMocks;
+import org.mockito.Mock;
+import org.mockito.junit.jupiter.MockitoExtension;
+
+import java.util.Optional;
+import java.util.UUID;
+
+import static org.assertj.core.api.Assertions.assertThat;
+import static org.mockito.ArgumentMatchers.any;
+import static org.mockito.ArgumentMatchers.argThat;
+import static org.mockito.Mockito.never;
+import static org.mockito.Mockito.verify;
+import static org.mockito.Mockito.when;
+
+@ExtendWith(MockitoExtension.class)
+class TagImportUpsertTest {
+
+ @Mock TagRepository tagRepository;
+ @InjectMocks TagService tagService;
+
+ @Test
+ void upsertBySourceRef_insertsNewTag_whenSourceRefUnknown() {
+ when(tagRepository.findBySourceRef("Themen/Brautbriefe")).thenReturn(Optional.empty());
+ when(tagRepository.save(any())).thenAnswer(inv -> inv.getArgument(0));
+
+ UUID parentId = UUID.randomUUID();
+ Tag result = tagService.upsertBySourceRef("Themen/Brautbriefe", "Brautbriefe", parentId);
+
+ assertThat(result.getSourceRef()).isEqualTo("Themen/Brautbriefe");
+ assertThat(result.getName()).isEqualTo("Brautbriefe");
+ assertThat(result.getParentId()).isEqualTo(parentId);
+ }
+
+ @Test
+ void upsertBySourceRef_updatesInPlace_whenSourceRefExists() {
+ Tag existing = Tag.builder().id(UUID.randomUUID()).name("Brautbriefe")
+ .sourceRef("Themen/Brautbriefe").build();
+ when(tagRepository.findBySourceRef("Themen/Brautbriefe")).thenReturn(Optional.of(existing));
+ when(tagRepository.save(any())).thenAnswer(inv -> inv.getArgument(0));
+
+ tagService.upsertBySourceRef("Themen/Brautbriefe", "Brautbriefe", null);
+
+ verify(tagRepository).save(argThat(t -> t.getId().equals(existing.getId())));
+ verify(tagRepository, never()).save(argThat(t -> t.getId() == null));
+ }
+
+ @Test
+ void upsertBySourceRef_preservesHumanRenamedTag_onReimport() {
+ Tag humanRenamed = Tag.builder().id(UUID.randomUUID()).name("Verlobungsbriefe")
+ .sourceRef("Themen/Brautbriefe").build();
+ when(tagRepository.findBySourceRef("Themen/Brautbriefe")).thenReturn(Optional.of(humanRenamed));
+ when(tagRepository.save(any())).thenAnswer(inv -> inv.getArgument(0));
+
+ Tag result = tagService.upsertBySourceRef("Themen/Brautbriefe", "Brautbriefe", null);
+
+ assertThat(result.getName()).isEqualTo("Verlobungsbriefe");
+ }
+}
diff --git a/backend/src/test/java/org/raddatz/familienarchiv/user/AdminControllerTest.java b/backend/src/test/java/org/raddatz/familienarchiv/user/AdminControllerTest.java
index b87b928b..8e51fad7 100644
--- a/backend/src/test/java/org/raddatz/familienarchiv/user/AdminControllerTest.java
+++ b/backend/src/test/java/org/raddatz/familienarchiv/user/AdminControllerTest.java
@@ -7,7 +7,8 @@ import org.raddatz.familienarchiv.security.PermissionAspect;
import org.raddatz.familienarchiv.user.CustomUserDetailsService;
import org.raddatz.familienarchiv.document.DocumentService;
import org.raddatz.familienarchiv.document.DocumentVersionService;
-import org.raddatz.familienarchiv.importing.MassImportService;
+import org.raddatz.familienarchiv.importing.CanonicalImportOrchestrator;
+import org.raddatz.familienarchiv.importing.ImportStatus;
import org.raddatz.familienarchiv.document.ThumbnailBackfillService;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.autoconfigure.aop.AopAutoConfiguration;
@@ -35,7 +36,7 @@ class AdminControllerTest {
@Autowired MockMvc mockMvc;
- @MockitoBean MassImportService massImportService;
+ @MockitoBean CanonicalImportOrchestrator importOrchestrator;
@MockitoBean DocumentService documentService;
@MockitoBean DocumentVersionService documentVersionService;
@MockitoBean ThumbnailBackfillService thumbnailBackfillService;
@@ -46,9 +47,9 @@ class AdminControllerTest {
@Test
@WithMockUser(authorities = "ADMIN")
void importStatus_returns200_withStatusCode_whenAdmin() throws Exception {
- MassImportService.ImportStatus status = new MassImportService.ImportStatus(
- MassImportService.State.IDLE, "IMPORT_IDLE", "Kein Import gestartet.", 0, List.of(), null);
- when(massImportService.getStatus()).thenReturn(status);
+ ImportStatus status = new ImportStatus(
+ ImportStatus.State.IDLE, "IMPORT_IDLE", "Kein Import gestartet.", 0, List.of(), null);
+ when(importOrchestrator.getStatus()).thenReturn(status);
mockMvc.perform(get("/api/admin/import-status"))
.andExpect(status().isOk())
@@ -60,9 +61,9 @@ class AdminControllerTest {
@Test
@WithMockUser(authorities = "ADMIN")
void importStatus_messageField_notPresentInApiResponse() throws Exception {
- MassImportService.ImportStatus status = new MassImportService.ImportStatus(
- MassImportService.State.IDLE, "IMPORT_IDLE", "Kein Import gestartet.", 0, List.of(), null);
- when(massImportService.getStatus()).thenReturn(status);
+ ImportStatus status = new ImportStatus(
+ ImportStatus.State.IDLE, "IMPORT_IDLE", "Kein Import gestartet.", 0, List.of(), null);
+ when(importOrchestrator.getStatus()).thenReturn(status);
mockMvc.perform(get("/api/admin/import-status"))
.andExpect(status().isOk())
diff --git a/docker-compose.prod.yml b/docker-compose.prod.yml
index cdae6581..26e07442 100644
--- a/docker-compose.prod.yml
+++ b/docker-compose.prod.yml
@@ -26,15 +26,19 @@
# MAIL_HOST, MAIL_PORT, SMTP relay (production only; staging uses mailpit)
# MAIL_USERNAME, MAIL_PASSWORD
# APP_MAIL_FROM sender address (e.g. noreply@raddatz.cloud)
-# IMPORT_HOST_DIR absolute host path holding ONLY the ODS
-# spreadsheet and PDFs for /admin/system mass
+# IMPORT_HOST_DIR absolute host path holding the canonical
+# import artifacts (canonical-*.xlsx +
+# canonical-persons-tree.json) and the
+# .pdf files for /admin/system
# import — mounted read-only at /import inside
# the backend. Compose refuses to start when
# this var is unset, so staging and prod cannot
# accidentally share an import source. Must be
# readable by the backend container's UID
# (currently root via the OpenJDK image — any
-# world-readable directory works).
+# world-readable directory works). Canonical
+# artifacts are NOT in git (PII — ADR-025); ops
+# syncs them in beside the PDFs out-of-band.
networks:
archiv-net:
@@ -217,12 +221,17 @@ services:
# Bound to localhost only — Caddy fronts external traffic.
ports:
- "127.0.0.1:${PORT_BACKEND}:8080"
- # Host path holding the ODS spreadsheet + PDFs for the mass-import endpoint.
- # Read-only; MassImportService only reads (Files.list / Files.walk on /import).
+ # Host path holding the canonical import artifacts (canonical-*.xlsx +
+ # canonical-persons-tree.json) + .pdf files for the import endpoint.
+ # Read-only; the canonical importer only reads them from /import.
# Required — no default — so staging and prod cannot accidentally share an
# import source. CI workflows pin this per-env (see .gitea/workflows/).
+ # NOTE: the canonical artifacts are NOT version-controlled (they contain real
+ # family PII — see ADR-025). Ops must produce them locally from the Python
+ # normalizer (tools/import-normalizer/) and sync them into this host path
+ # alongside the .pdf corpus before triggering an import.
volumes:
- - ${IMPORT_HOST_DIR:?Set IMPORT_HOST_DIR to a host path holding the mass-import payload (ODS + PDFs). See docs/DEPLOYMENT.md.}:/import:ro
+ - ${IMPORT_HOST_DIR:?Set IMPORT_HOST_DIR to a host path holding the import payload (canonical artifacts + .pdf files). See docs/DEPLOYMENT.md.}:/import:ro
environment:
SPRING_DATASOURCE_URL: jdbc:postgresql://db:5432/archiv
SPRING_DATASOURCE_USERNAME: archiv
diff --git a/docs/ARCHITECTURE.md b/docs/ARCHITECTURE.md
index de071a43..5bc46261 100644
--- a/docs/ARCHITECTURE.md
+++ b/docs/ARCHITECTURE.md
@@ -65,7 +65,7 @@ Members of the cross-cutting layer have no entity of their own, no user-facing C
| `dashboard` | Stats aggregation for the admin dashboard and Family Pulse widget | Aggregates from 3+ domains; no owned entities |
| `exception` | `DomainException`, `ErrorCode` enum, `GlobalExceptionHandler` | Framework infra; consumed by every controller and service. Adding a new `ErrorCode` requires matching updates in `frontend/src/lib/shared/errors.ts` and all three `messages/*.json` locale files. Current security-related codes: `CSRF_TOKEN_MISSING` (403 on mutating request without valid `X-XSRF-TOKEN` header), `TOO_MANY_LOGIN_ATTEMPTS` (429 when login rate limit exceeded). |
| `filestorage` | `FileService` — MinIO/S3 upload, download, presigned-URL generation | Generic service; consumed by `document` and `ocr` |
-| `importing` | `MassImportService` — async ODS/Excel batch import | Orchestrates across `person`, `tag`, `document` |
+| `importing` | `CanonicalImportOrchestrator` — async canonical import running four idempotent loaders (`TagTreeImporter` → `PersonRegisterImporter` → `PersonTreeImporter` → `DocumentImporter`) over the normalizer's committed canonical artifacts (`canonical-*.xlsx` + `canonical-persons-tree.json`) | Orchestrates across `person`, `tag`, `document` |
| `security` | `SecurityConfig`, `Permission` enum, `@RequirePermission` annotation, `PermissionAspect` (AOP) | Framework infra; enforced globally across all controllers |
**Frontend `shared/`** follows the same admission criteria. Key members: `api.server.ts` (typed openapi-fetch client factory), `errors.ts` (backend `ErrorCode` → i18n mapping), `shared/primitives/` (generic UI components used across ≥2 domains), `shared/discussion/` (comment/mention editor used by `document` and `geschichte`), `shared/utils/` (pure date/sort/debounce utilities).
diff --git a/docs/DEPLOYMENT.md b/docs/DEPLOYMENT.md
index c6560a0a..2e79481e 100644
--- a/docs/DEPLOYMENT.md
+++ b/docs/DEPLOYMENT.md
@@ -99,7 +99,7 @@ All vars are set in `.env` at the repo root (copy from `.env.example`). The back
| `APP_BASE_URL` | Public-facing URL for email links | `http://localhost:3000` | YES (prod) | — |
| `APP_OCR_BASE_URL` | Internal URL of the OCR service | — | YES | — |
| `APP_OCR_TRAINING_TOKEN` | Secret token for OCR training endpoints | — | YES (prod) | YES |
-| `IMPORT_HOST_DIR` | Absolute host path holding the ODS spreadsheet + PDFs for the `/admin/system` mass-import card. Mounted read-only at `/import` inside the backend (compose-only — backend reads via `app.import.dir`). Compose refuses to start when unset, so staging and prod cannot accidentally share the source. Convention: `/srv/familienarchiv-staging/import` and `/srv/familienarchiv-production/import` | — | YES (prod compose) | — |
+| `IMPORT_HOST_DIR` | Absolute host path holding the normalizer's canonical artifacts (`canonical-{documents,persons,tag-tree}.xlsx` + `canonical-persons-tree.json`) **plus the `.pdf` files** for the `/admin/system` import. Mounted read-only at `/import` inside the backend (the canonical importer reads via `app.import.dir`). Compose refuses to start when unset, so staging and prod cannot accidentally share the source. Convention: `/srv/familienarchiv-staging/import` and `/srv/familienarchiv-production/import` | — | YES (prod compose) | — |
| `MAIL_HOST` | SMTP host | `mailpit` (dev) | YES (prod) | — |
| `MAIL_PORT` | SMTP port | `1025` (dev) | YES (prod) | — |
| `MAIL_USERNAME` | SMTP username | — | YES (prod) | YES |
@@ -559,20 +559,45 @@ bash scripts/download-kraken-models.sh
> Downloads the Kurrent/Sütterlin HTR models. Run once after a fresh clone or when models are updated.
-### Trigger a mass import (Excel/ODS)
+### Trigger a canonical import
-**Dev:** drop the ODS spreadsheet + PDFs into `./import/` at the repo root — the dev compose bind-mounts it to `/import` automatically.
+The importer no longer parses the raw spreadsheet. It consumes the **canonical artifacts**
+produced by the normalizer (`tools/import-normalizer/`) — `canonical-tag-tree.xlsx`,
+`canonical-persons.xlsx`, `canonical-persons-tree.json`, `canonical-documents.xlsx` — which
+are committed under `tools/import-normalizer/out/`. The semantic transformation
+(German-date parsing, name classification) lives entirely in the normalizer; the backend
+maps the clean columns by header name. See [ADR-025](adr/025-canonical-import-and-single-migration-schema-foundation.md).
+
+**Prerequisite — regenerate the artifacts when the source data changes:**
+
+```bash
+cd tools/import-normalizer
+python3 -m venv .venv && .venv/bin/pip install -r requirements.txt # once, on a fresh clone
+.venv/bin/python normalize.py
+# writes the four canonical artifacts into ./out/
+```
+
+**Dev:** place all four canonical artifacts **plus** the PDFs into `./import/`
+at the repo root (the dev compose bind-mounts it to `/import`, which is `app.import.dir`).
+Each PDF must be named `