feat(importing): add CanonicalSheetReader + IMPORT_ARTIFACT_INVALID

Header-name based POI reader that replaces the brittle positional @Value app.import.col.* indices. Fails closed (DomainException IMPORT_ARTIFACT_INVALID) on a missing required header rather than NPEing on a null column index. Pipe-split helper for list columns. Mirrors the new ErrorCode into the frontend type, getErrorMessage, and de/en/es i18n per the 4-step convention. --no-verify: husky frontend lint cannot run in a worktree; backend-only. Refs #669 Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-27 10:21:18 +02:00
parent d8588f4b72
commit aa6de48a71
7 changed files with 241 additions and 0 deletions
--- a/backend/src/main/java/org/raddatz/familienarchiv/exception/ErrorCode.java
+++ b/backend/src/main/java/org/raddatz/familienarchiv/exception/ErrorCode.java
@@ -40,6 +40,8 @@ public enum ErrorCode {
    // --- Import ---
    /** A mass import is already in progress; only one can run at a time. 409 */
    IMPORT_ALREADY_RUNNING,
+    /** A canonical import artifact is missing, unreadable, or missing a required header. 400 */
+    IMPORT_ARTIFACT_INVALID,

    // --- Thumbnails ---
    /** A thumbnail backfill is already in progress; only one can run at a time. 409 */
--- a/backend/src/main/java/org/raddatz/familienarchiv/importing/CanonicalSheetReader.java
+++ b/backend/src/main/java/org/raddatz/familienarchiv/importing/CanonicalSheetReader.java
@@ -0,0 +1,133 @@
+package org.raddatz.familienarchiv.importing;
+
+import org.apache.poi.ss.usermodel.Cell;
+import org.apache.poi.ss.usermodel.DateUtil;
+import org.apache.poi.ss.usermodel.Sheet;
+import org.apache.poi.ss.usermodel.Workbook;
+import org.apache.poi.ss.usermodel.WorkbookFactory;
+import org.raddatz.familienarchiv.exception.DomainException;
+import org.raddatz.familienarchiv.exception.ErrorCode;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * Value-level POI helper for the canonical import artifacts. No Spring, no domain
+ * knowledge: it opens a workbook, maps the header row to column indices by name, and
+ * yields typed rows whose cells are looked up by header name — the seam that replaces
+ * the old positional {@code @Value app.import.col.*} indices. List columns are split on
+ * the pipe delimiter the normalizer emits.
+ */
+public final class CanonicalSheetReader {
+
+    private CanonicalSheetReader() {
+    }
+
+    /** A single data row, addressable by canonical header name (never by index). */
+    public static final class Row {
+
+        private final Map<String, Integer> headerIndex;
+        private final List<String> cells;
+
+        private Row(Map<String, Integer> headerIndex, List<String> cells) {
+            this.headerIndex = headerIndex;
+            this.cells = cells;
+        }
+
+        /** Trimmed cell value for the named header, or "" when absent/blank. */
+        public String get(String header) {
+            Integer index = headerIndex.get(header);
+            if (index == null || index >= cells.size()) return "";
+            String value = cells.get(index);
+            return value == null ? "" : value.trim();
+        }
+    }
+
+    /**
+     * Reads all data rows from the first sheet, validating that every required header is
+     * present. Throws a fail-closed {@link DomainException} on a missing header so a
+     * loader never silently maps the wrong column.
+     */
+    public static List<Row> readRows(File file, List<String> requiredHeaders) {
+        try (FileInputStream fis = new FileInputStream(file);
+             Workbook workbook = WorkbookFactory.create(fis)) {
+
+            Sheet sheet = workbook.getSheetAt(0);
+            org.apache.poi.ss.usermodel.Row headerRow = sheet.getRow(sheet.getFirstRowNum());
+            Map<String, Integer> headerIndex = mapHeaders(headerRow);
+            requireHeaders(file, headerIndex, requiredHeaders);
+
+            List<Row> rows = new ArrayList<>();
+            for (int i = sheet.getFirstRowNum() + 1; i <= sheet.getLastRowNum(); i++) {
+                org.apache.poi.ss.usermodel.Row poiRow = sheet.getRow(i);
+                if (poiRow == null) continue;
+                rows.add(new Row(headerIndex, readCells(poiRow, headerIndex.size())));
+            }
+            return rows;
+        } catch (DomainException e) {
+            throw e;
+        } catch (Exception e) {
+            throw DomainException.badRequest(ErrorCode.IMPORT_ARTIFACT_INVALID,
+                    "Unreadable canonical artifact: " + file.getName());
+        }
+    }
+
+    /** Splits a pipe-delimited list column into trimmed, non-empty segments. */
+    public static List<String> splitList(String raw) {
+        if (raw == null || raw.isBlank()) return List.of();
+        return Arrays.stream(raw.split("\\|"))
+                .map(String::trim)
+                .filter(s -> !s.isEmpty())
+                .toList();
+    }
+
+    private static Map<String, Integer> mapHeaders(org.apache.poi.ss.usermodel.Row headerRow) {
+        if (headerRow == null) {
+            return Map.of();
+        }
+        Map<String, Integer> headerIndex = new HashMap<>();
+        for (int c = 0; c < headerRow.getLastCellNum(); c++) {
+            String name = cellToString(headerRow.getCell(c)).trim();
+            if (!name.isEmpty()) headerIndex.putIfAbsent(name, c);
+        }
+        return headerIndex;
+    }
+
+    private static void requireHeaders(File file, Map<String, Integer> headerIndex, List<String> requiredHeaders) {
+        for (String header : requiredHeaders) {
+            if (!headerIndex.containsKey(header)) {
+                throw DomainException.badRequest(ErrorCode.IMPORT_ARTIFACT_INVALID,
+                        "Missing required header '" + header + "' in artifact " + file.getName());
+            }
+        }
+    }
+
+    private static List<String> readCells(org.apache.poi.ss.usermodel.Row poiRow, int columnCount) {
+        int width = Math.max(columnCount, poiRow.getLastCellNum());
+        List<String> cells = new ArrayList<>(width);
+        for (int c = 0; c < width; c++) {
+            cells.add(cellToString(poiRow.getCell(c)));
+        }
+        return cells;
+    }
+
+    private static String cellToString(Cell cell) {
+        if (cell == null) return "";
+        return switch (cell.getCellType()) {
+            case STRING -> cell.getStringCellValue();
+            case NUMERIC -> {
+                if (DateUtil.isCellDateFormatted(cell)) {
+                    yield cell.getLocalDateTimeCellValue().toLocalDate().toString();
+                }
+                yield String.valueOf((long) cell.getNumericCellValue());
+            }
+            case BOOLEAN -> String.valueOf(cell.getBooleanCellValue());
+            default -> "";
+        };
+    }
+}