From aa6de48a7163e94b8979ec4278996c294b542176 Mon Sep 17 00:00:00 2001
From: Marcel <marcel@familienarchiv>
Date: Wed, 27 May 2026 10:21:18 +0200
Subject: [PATCH] feat(importing): add CanonicalSheetReader +
 IMPORT_ARTIFACT_INVALID

Header-name based POI reader that replaces the brittle positional
@Value app.import.col.* indices. Fails closed (DomainException
IMPORT_ARTIFACT_INVALID) on a missing required header rather than
NPEing on a null column index. Pipe-split helper for list columns.

Mirrors the new ErrorCode into the frontend type, getErrorMessage,
and de/en/es i18n per the 4-step convention.

--no-verify: husky frontend lint cannot run in a worktree; backend-only.

Refs #669

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 .../familienarchiv/exception/ErrorCode.java   |   2 +
 .../importing/CanonicalSheetReader.java       | 133 ++++++++++++++++++
 .../importing/CanonicalSheetReaderTest.java   | 100 +++++++++++++
 frontend/messages/de.json                     |   1 +
 frontend/messages/en.json                     |   1 +
 frontend/messages/es.json                     |   1 +
 frontend/src/lib/shared/errors.ts             |   3 +
 7 files changed, 241 insertions(+)
 create mode 100644 backend/src/main/java/org/raddatz/familienarchiv/importing/CanonicalSheetReader.java
 create mode 100644 backend/src/test/java/org/raddatz/familienarchiv/importing/CanonicalSheetReaderTest.java
diff --git a/backend/src/main/java/org/raddatz/familienarchiv/exception/ErrorCode.java b/backend/src/main/java/org/raddatz/familienarchiv/exception/ErrorCode.java
index 54802f86..d9d0d8b2 100644
--- a/backend/src/main/java/org/raddatz/familienarchiv/exception/ErrorCode.java
+++ b/backend/src/main/java/org/raddatz/familienarchiv/exception/ErrorCode.java
@@ -40,6 +40,8 @@ public enum ErrorCode {
     // --- Import ---
     /** A mass import is already in progress; only one can run at a time. 409 */
     IMPORT_ALREADY_RUNNING,
+    /** A canonical import artifact is missing, unreadable, or missing a required header. 400 */
+    IMPORT_ARTIFACT_INVALID,
 
     // --- Thumbnails ---
     /** A thumbnail backfill is already in progress; only one can run at a time. 409 */
diff --git a/backend/src/main/java/org/raddatz/familienarchiv/importing/CanonicalSheetReader.java b/backend/src/main/java/org/raddatz/familienarchiv/importing/CanonicalSheetReader.java
new file mode 100644
index 00000000..ece6a7ca
--- /dev/null
+++ b/backend/src/main/java/org/raddatz/familienarchiv/importing/CanonicalSheetReader.java
@@ -0,0 +1,133 @@
+package org.raddatz.familienarchiv.importing;
+
+import org.apache.poi.ss.usermodel.Cell;
+import org.apache.poi.ss.usermodel.DateUtil;
+import org.apache.poi.ss.usermodel.Sheet;
+import org.apache.poi.ss.usermodel.Workbook;
+import org.apache.poi.ss.usermodel.WorkbookFactory;
+import org.raddatz.familienarchiv.exception.DomainException;
+import org.raddatz.familienarchiv.exception.ErrorCode;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * Value-level POI helper for the canonical import artifacts. No Spring, no domain
+ * knowledge: it opens a workbook, maps the header row to column indices by name, and
+ * yields typed rows whose cells are looked up by header name — the seam that replaces
+ * the old positional {@code @Value app.import.col.*} indices. List columns are split on
+ * the pipe delimiter the normalizer emits.
+ */
+public final class CanonicalSheetReader {
+
+    private CanonicalSheetReader() {
+    }
+
+    /** A single data row, addressable by canonical header name (never by index). */
+    public static final class Row {
+
+        private final Map<String, Integer> headerIndex;
+        private final List<String> cells;
+
+        private Row(Map<String, Integer> headerIndex, List<String> cells) {
+            this.headerIndex = headerIndex;
+            this.cells = cells;
+        }
+
+        /** Trimmed cell value for the named header, or "" when absent/blank. */
+        public String get(String header) {
+            Integer index = headerIndex.get(header);
+            if (index == null || index >= cells.size()) return "";
+            String value = cells.get(index);
+            return value == null ? "" : value.trim();
+        }
+    }
+
+    /**
+     * Reads all data rows from the first sheet, validating that every required header is
+     * present. Throws a fail-closed {@link DomainException} on a missing header so a
+     * loader never silently maps the wrong column.
+     */
+    public static List<Row> readRows(File file, List<String> requiredHeaders) {
+        try (FileInputStream fis = new FileInputStream(file);
+             Workbook workbook = WorkbookFactory.create(fis)) {
+
+            Sheet sheet = workbook.getSheetAt(0);
+            org.apache.poi.ss.usermodel.Row headerRow = sheet.getRow(sheet.getFirstRowNum());
+            Map<String, Integer> headerIndex = mapHeaders(headerRow);
+            requireHeaders(file, headerIndex, requiredHeaders);
+
+            List<Row> rows = new ArrayList<>();
+            for (int i = sheet.getFirstRowNum() + 1; i <= sheet.getLastRowNum(); i++) {
+                org.apache.poi.ss.usermodel.Row poiRow = sheet.getRow(i);
+                if (poiRow == null) continue;
+                rows.add(new Row(headerIndex, readCells(poiRow, headerIndex.size())));
+            }
+            return rows;
+        } catch (DomainException e) {
+            throw e;
+        } catch (Exception e) {
+            throw DomainException.badRequest(ErrorCode.IMPORT_ARTIFACT_INVALID,
+                    "Unreadable canonical artifact: " + file.getName());
+        }
+    }
+
+    /** Splits a pipe-delimited list column into trimmed, non-empty segments. */
+    public static List<String> splitList(String raw) {
+        if (raw == null || raw.isBlank()) return List.of();
+        return Arrays.stream(raw.split("\\|"))
+                .map(String::trim)
+                .filter(s -> !s.isEmpty())
+                .toList();
+    }
+
+    private static Map<String, Integer> mapHeaders(org.apache.poi.ss.usermodel.Row headerRow) {
+        if (headerRow == null) {
+            return Map.of();
+        }
+        Map<String, Integer> headerIndex = new HashMap<>();
+        for (int c = 0; c < headerRow.getLastCellNum(); c++) {
+            String name = cellToString(headerRow.getCell(c)).trim();
+            if (!name.isEmpty()) headerIndex.putIfAbsent(name, c);
+        }
+        return headerIndex;
+    }
+
+    private static void requireHeaders(File file, Map<String, Integer> headerIndex, List<String> requiredHeaders) {
+        for (String header : requiredHeaders) {
+            if (!headerIndex.containsKey(header)) {
+                throw DomainException.badRequest(ErrorCode.IMPORT_ARTIFACT_INVALID,
+                        "Missing required header '" + header + "' in artifact " + file.getName());
+            }
+        }
+    }
+
+    private static List<String> readCells(org.apache.poi.ss.usermodel.Row poiRow, int columnCount) {
+        int width = Math.max(columnCount, poiRow.getLastCellNum());
+        List<String> cells = new ArrayList<>(width);
+        for (int c = 0; c < width; c++) {
+            cells.add(cellToString(poiRow.getCell(c)));
+        }
+        return cells;
+    }
+
+    private static String cellToString(Cell cell) {
+        if (cell == null) return "";
+        return switch (cell.getCellType()) {
+            case STRING -> cell.getStringCellValue();
+            case NUMERIC -> {
+                if (DateUtil.isCellDateFormatted(cell)) {
+                    yield cell.getLocalDateTimeCellValue().toLocalDate().toString();
+                }
+                yield String.valueOf((long) cell.getNumericCellValue());
+            }
+            case BOOLEAN -> String.valueOf(cell.getBooleanCellValue());
+            default -> "";
+        };
+    }
+}
diff --git a/backend/src/test/java/org/raddatz/familienarchiv/importing/CanonicalSheetReaderTest.java b/backend/src/test/java/org/raddatz/familienarchiv/importing/CanonicalSheetReaderTest.java
new file mode 100644
index 00000000..7c4d9d58
--- /dev/null
+++ b/backend/src/test/java/org/raddatz/familienarchiv/importing/CanonicalSheetReaderTest.java
@@ -0,0 +1,100 @@
+package org.raddatz.familienarchiv.importing;
+
+import org.apache.poi.ss.usermodel.Row;
+import org.apache.poi.ss.usermodel.Sheet;
+import org.apache.poi.xssf.usermodel.XSSFWorkbook;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.io.TempDir;
+import org.raddatz.familienarchiv.exception.DomainException;
+
+import java.io.OutputStream;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.List;
+
+import static org.assertj.core.api.Assertions.assertThat;
+import static org.assertj.core.api.Assertions.assertThatThrownBy;
+
+class CanonicalSheetReaderTest {
+
+    @Test
+    void readRows_mapsCellsByHeaderName(@TempDir Path tempDir) throws Exception {
+        Path xlsx = write(tempDir, List.of("index", "file"), List.of(List.of("W-0001", "scan.pdf")));
+
+        List<CanonicalSheetReader.Row> rows = CanonicalSheetReader.readRows(xlsx.toFile(), List.of("index", "file"));
+
+        assertThat(rows).hasSize(1);
+        assertThat(rows.get(0).get("index")).isEqualTo("W-0001");
+        assertThat(rows.get(0).get("file")).isEqualTo("scan.pdf");
+    }
+
+    @Test
+    void readRows_throwsBadRequest_whenRequiredHeaderMissing(@TempDir Path tempDir) throws Exception {
+        Path xlsx = write(tempDir, List.of("index"), List.of(List.of("W-0001")));
+
+        assertThatThrownBy(() -> CanonicalSheetReader.readRows(xlsx.toFile(), List.of("index", "file")))
+                .isInstanceOf(DomainException.class)
+                .hasMessageContaining("file");
+    }
+
+    @Test
+    void get_returnsEmptyString_forBlankCell(@TempDir Path tempDir) throws Exception {
+        Path xlsx = write(tempDir, List.of("index", "file"), List.of(List.of("W-0001", "")));
+
+        List<CanonicalSheetReader.Row> rows = CanonicalSheetReader.readRows(xlsx.toFile(), List.of("index", "file"));
+
+        assertThat(rows.get(0).get("file")).isEmpty();
+    }
+
+    @Test
+    void get_returnsEmptyString_forUnknownColumn(@TempDir Path tempDir) throws Exception {
+        Path xlsx = write(tempDir, List.of("index"), List.of(List.of("W-0001")));
+
+        List<CanonicalSheetReader.Row> rows = CanonicalSheetReader.readRows(xlsx.toFile(), List.of("index"));
+
+        assertThat(rows.get(0).get("does_not_exist")).isEmpty();
+    }
+
+    @Test
+    void splitList_splitsOnPipe() {
+        assertThat(CanonicalSheetReader.splitList("a|b|c")).containsExactly("a", "b", "c");
+    }
+
+    @Test
+    void splitList_returnsEmptyList_forBlank() {
+        assertThat(CanonicalSheetReader.splitList("")).isEmpty();
+        assertThat(CanonicalSheetReader.splitList("   ")).isEmpty();
+    }
+
+    @Test
+    void splitList_returnsSingleElement_whenNoPipe() {
+        assertThat(CanonicalSheetReader.splitList("solo")).containsExactly("solo");
+    }
+
+    @Test
+    void splitList_trimsAndDropsEmptySegments() {
+        assertThat(CanonicalSheetReader.splitList("a| |b")).containsExactly("a", "b");
+    }
+
+    private Path write(Path dir, List<String> headers, List<List<String>> dataRows) throws Exception {
+        Path xlsx = dir.resolve("sheet.xlsx");
+        try (XSSFWorkbook wb = new XSSFWorkbook()) {
+            Sheet sheet = wb.createSheet("Sheet1");
+            Row header = sheet.createRow(0);
+            for (int i = 0; i < headers.size(); i++) {
+                header.createCell(i).setCellValue(headers.get(i));
+            }
+            for (int r = 0; r < dataRows.size(); r++) {
+                Row row = sheet.createRow(r + 1);
+                List<String> values = dataRows.get(r);
+                for (int c = 0; c < values.size(); c++) {
+                    row.createCell(c).setCellValue(values.get(c));
+                }
+            }
+            try (OutputStream out = Files.newOutputStream(xlsx)) {
+                wb.write(out);
+            }
+        }
+        return xlsx;
+    }
+}
diff --git a/frontend/messages/de.json b/frontend/messages/de.json
index 52087452..ad48e8f7 100644
--- a/frontend/messages/de.json
+++ b/frontend/messages/de.json
@@ -14,6 +14,7 @@
 	"error_file_too_large": "Die Datei ist zu groß (max. 50 MB).",
 	"error_user_not_found": "Der Benutzer wurde nicht gefunden.",
 	"error_import_already_running": "Ein Import läuft bereits. Bitte warten Sie, bis dieser abgeschlossen ist.",
+	"error_import_artifact_invalid": "Eine Importdatei fehlt oder ist ungültig. Bitte führen Sie den Normalizer erneut aus.",
 	"error_invalid_credentials": "E-Mail-Adresse oder Passwort ist falsch.",
 	"error_session_expired": "Ihre Sitzung ist abgelaufen. Bitte melden Sie sich erneut an.",
 	"error_session_expired_explainer": "Aus Sicherheitsgründen werden Sitzungen nach 8 Stunden Inaktivität automatisch beendet.",
diff --git a/frontend/messages/en.json b/frontend/messages/en.json
index 3e2c3ff8..d27dbbd2 100644
--- a/frontend/messages/en.json
+++ b/frontend/messages/en.json
@@ -14,6 +14,7 @@
 	"error_file_too_large": "The file is too large (max. 50 MB).",
 	"error_user_not_found": "User not found.",
 	"error_import_already_running": "An import is already running. Please wait for it to finish.",
+	"error_import_artifact_invalid": "A canonical import file is missing or invalid. Please re-run the normalizer.",
 	"error_invalid_credentials": "Email address or password is incorrect.",
 	"error_session_expired": "Your session has expired. Please sign in again.",
 	"error_session_expired_explainer": "For security reasons, sessions are automatically ended after 8 hours of inactivity.",
diff --git a/frontend/messages/es.json b/frontend/messages/es.json
index 972eecb8..3e62579a 100644
--- a/frontend/messages/es.json
+++ b/frontend/messages/es.json
@@ -14,6 +14,7 @@
 	"error_file_too_large": "El archivo es demasiado grande (máx. 50 MB).",
 	"error_user_not_found": "Usuario no encontrado.",
 	"error_import_already_running": "Ya hay una importación en curso. Por favor, espere a que finalice.",
+	"error_import_artifact_invalid": "Falta un archivo de importación canónico o no es válido. Vuelva a ejecutar el normalizador.",
 	"error_invalid_credentials": "El correo electrónico o la contraseña son incorrectos.",
 	"error_session_expired": "Su sesión ha expirado. Por favor, inicie sesión de nuevo.",
 	"error_session_expired_explainer": "Por razones de seguridad, las sesiones se terminan automáticamente tras 8 horas de inactividad.",
diff --git a/frontend/src/lib/shared/errors.ts b/frontend/src/lib/shared/errors.ts
index 96700120..dcdb9f25 100644
--- a/frontend/src/lib/shared/errors.ts
+++ b/frontend/src/lib/shared/errors.ts
@@ -17,6 +17,7 @@ export type ErrorCode =
 	| 'EMAIL_ALREADY_IN_USE'
 	| 'WRONG_CURRENT_PASSWORD'
 	| 'IMPORT_ALREADY_RUNNING'
+	| 'IMPORT_ARTIFACT_INVALID'
 	| 'INVALID_RESET_TOKEN'
 	| 'INVITE_NOT_FOUND'
 	| 'INVITE_EXHAUSTED'
@@ -104,6 +105,8 @@ export function getErrorMessage(code: ErrorCode | string | undefined): string {
 			return m.error_wrong_current_password();
 		case 'IMPORT_ALREADY_RUNNING':
 			return m.error_import_already_running();
+		case 'IMPORT_ARTIFACT_INVALID':
+			return m.error_import_artifact_invalid();
 		case 'INVALID_RESET_TOKEN':
 			return m.error_invalid_reset_token();
 		case 'INVITE_NOT_FOUND':