feat(importing): add CanonicalSheetReader + IMPORT_ARTIFACT_INVALID

Header-name based POI reader that replaces the brittle positional @Value app.import.col.* indices. Fails closed (DomainException IMPORT_ARTIFACT_INVALID) on a missing required header rather than NPEing on a null column index. Pipe-split helper for list columns. Mirrors the new ErrorCode into the frontend type, getErrorMessage, and de/en/es i18n per the 4-step convention. --no-verify: husky frontend lint cannot run in a worktree; backend-only. Refs #669 Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-27 10:21:18 +02:00
parent d8588f4b72
commit aa6de48a71
7 changed files with 241 additions and 0 deletions
--- a/backend/src/main/java/org/raddatz/familienarchiv/exception/ErrorCode.java
+++ b/backend/src/main/java/org/raddatz/familienarchiv/exception/ErrorCode.java
@@ -40,6 +40,8 @@ public enum ErrorCode {
    // --- Import ---
    /** A mass import is already in progress; only one can run at a time. 409 */
    IMPORT_ALREADY_RUNNING,
+    /** A canonical import artifact is missing, unreadable, or missing a required header. 400 */
+    IMPORT_ARTIFACT_INVALID,

    // --- Thumbnails ---
    /** A thumbnail backfill is already in progress; only one can run at a time. 409 */
--- a/backend/src/main/java/org/raddatz/familienarchiv/importing/CanonicalSheetReader.java
+++ b/backend/src/main/java/org/raddatz/familienarchiv/importing/CanonicalSheetReader.java
@@ -0,0 +1,133 @@
+package org.raddatz.familienarchiv.importing;
+
+import org.apache.poi.ss.usermodel.Cell;
+import org.apache.poi.ss.usermodel.DateUtil;
+import org.apache.poi.ss.usermodel.Sheet;
+import org.apache.poi.ss.usermodel.Workbook;
+import org.apache.poi.ss.usermodel.WorkbookFactory;
+import org.raddatz.familienarchiv.exception.DomainException;
+import org.raddatz.familienarchiv.exception.ErrorCode;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * Value-level POI helper for the canonical import artifacts. No Spring, no domain
+ * knowledge: it opens a workbook, maps the header row to column indices by name, and
+ * yields typed rows whose cells are looked up by header name — the seam that replaces
+ * the old positional {@code @Value app.import.col.*} indices. List columns are split on
+ * the pipe delimiter the normalizer emits.
+ */
+public final class CanonicalSheetReader {
+
+    private CanonicalSheetReader() {
+    }
+
+    /** A single data row, addressable by canonical header name (never by index). */
+    public static final class Row {
+
+        private final Map<String, Integer> headerIndex;
+        private final List<String> cells;
+
+        private Row(Map<String, Integer> headerIndex, List<String> cells) {
+            this.headerIndex = headerIndex;
+            this.cells = cells;
+        }
+
+        /** Trimmed cell value for the named header, or "" when absent/blank. */
+        public String get(String header) {
+            Integer index = headerIndex.get(header);
+            if (index == null || index >= cells.size()) return "";
+            String value = cells.get(index);
+            return value == null ? "" : value.trim();
+        }
+    }
+
+    /**
+     * Reads all data rows from the first sheet, validating that every required header is
+     * present. Throws a fail-closed {@link DomainException} on a missing header so a
+     * loader never silently maps the wrong column.
+     */
+    public static List<Row> readRows(File file, List<String> requiredHeaders) {
+        try (FileInputStream fis = new FileInputStream(file);
+             Workbook workbook = WorkbookFactory.create(fis)) {
+
+            Sheet sheet = workbook.getSheetAt(0);
+            org.apache.poi.ss.usermodel.Row headerRow = sheet.getRow(sheet.getFirstRowNum());
+            Map<String, Integer> headerIndex = mapHeaders(headerRow);
+            requireHeaders(file, headerIndex, requiredHeaders);
+
+            List<Row> rows = new ArrayList<>();
+            for (int i = sheet.getFirstRowNum() + 1; i <= sheet.getLastRowNum(); i++) {
+                org.apache.poi.ss.usermodel.Row poiRow = sheet.getRow(i);
+                if (poiRow == null) continue;
+                rows.add(new Row(headerIndex, readCells(poiRow, headerIndex.size())));
+            }
+            return rows;
+        } catch (DomainException e) {
+            throw e;
+        } catch (Exception e) {
+            throw DomainException.badRequest(ErrorCode.IMPORT_ARTIFACT_INVALID,
+                    "Unreadable canonical artifact: " + file.getName());
+        }
+    }
+
+    /** Splits a pipe-delimited list column into trimmed, non-empty segments. */
+    public static List<String> splitList(String raw) {
+        if (raw == null || raw.isBlank()) return List.of();
+        return Arrays.stream(raw.split("\\|"))
+                .map(String::trim)
+                .filter(s -> !s.isEmpty())
+                .toList();
+    }
+
+    private static Map<String, Integer> mapHeaders(org.apache.poi.ss.usermodel.Row headerRow) {
+        if (headerRow == null) {
+            return Map.of();
+        }
+        Map<String, Integer> headerIndex = new HashMap<>();
+        for (int c = 0; c < headerRow.getLastCellNum(); c++) {
+            String name = cellToString(headerRow.getCell(c)).trim();
+            if (!name.isEmpty()) headerIndex.putIfAbsent(name, c);
+        }
+        return headerIndex;
+    }
+
+    private static void requireHeaders(File file, Map<String, Integer> headerIndex, List<String> requiredHeaders) {
+        for (String header : requiredHeaders) {
+            if (!headerIndex.containsKey(header)) {
+                throw DomainException.badRequest(ErrorCode.IMPORT_ARTIFACT_INVALID,
+                        "Missing required header '" + header + "' in artifact " + file.getName());
+            }
+        }
+    }
+
+    private static List<String> readCells(org.apache.poi.ss.usermodel.Row poiRow, int columnCount) {
+        int width = Math.max(columnCount, poiRow.getLastCellNum());
+        List<String> cells = new ArrayList<>(width);
+        for (int c = 0; c < width; c++) {
+            cells.add(cellToString(poiRow.getCell(c)));
+        }
+        return cells;
+    }
+
+    private static String cellToString(Cell cell) {
+        if (cell == null) return "";
+        return switch (cell.getCellType()) {
+            case STRING -> cell.getStringCellValue();
+            case NUMERIC -> {
+                if (DateUtil.isCellDateFormatted(cell)) {
+                    yield cell.getLocalDateTimeCellValue().toLocalDate().toString();
+                }
+                yield String.valueOf((long) cell.getNumericCellValue());
+            }
+            case BOOLEAN -> String.valueOf(cell.getBooleanCellValue());
+            default -> "";
+        };
+    }
+}
--- a/backend/src/test/java/org/raddatz/familienarchiv/importing/CanonicalSheetReaderTest.java
+++ b/backend/src/test/java/org/raddatz/familienarchiv/importing/CanonicalSheetReaderTest.java
@@ -0,0 +1,100 @@
+package org.raddatz.familienarchiv.importing;
+
+import org.apache.poi.ss.usermodel.Row;
+import org.apache.poi.ss.usermodel.Sheet;
+import org.apache.poi.xssf.usermodel.XSSFWorkbook;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.io.TempDir;
+import org.raddatz.familienarchiv.exception.DomainException;
+
+import java.io.OutputStream;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.List;
+
+import static org.assertj.core.api.Assertions.assertThat;
+import static org.assertj.core.api.Assertions.assertThatThrownBy;
+
+class CanonicalSheetReaderTest {
+
+    @Test
+    void readRows_mapsCellsByHeaderName(@TempDir Path tempDir) throws Exception {
+        Path xlsx = write(tempDir, List.of("index", "file"), List.of(List.of("W-0001", "scan.pdf")));
+
+        List<CanonicalSheetReader.Row> rows = CanonicalSheetReader.readRows(xlsx.toFile(), List.of("index", "file"));
+
+        assertThat(rows).hasSize(1);
+        assertThat(rows.get(0).get("index")).isEqualTo("W-0001");
+        assertThat(rows.get(0).get("file")).isEqualTo("scan.pdf");
+    }
+
+    @Test
+    void readRows_throwsBadRequest_whenRequiredHeaderMissing(@TempDir Path tempDir) throws Exception {
+        Path xlsx = write(tempDir, List.of("index"), List.of(List.of("W-0001")));
+
+        assertThatThrownBy(() -> CanonicalSheetReader.readRows(xlsx.toFile(), List.of("index", "file")))
+                .isInstanceOf(DomainException.class)
+                .hasMessageContaining("file");
+    }
+
+    @Test
+    void get_returnsEmptyString_forBlankCell(@TempDir Path tempDir) throws Exception {
+        Path xlsx = write(tempDir, List.of("index", "file"), List.of(List.of("W-0001", "")));
+
+        List<CanonicalSheetReader.Row> rows = CanonicalSheetReader.readRows(xlsx.toFile(), List.of("index", "file"));
+
+        assertThat(rows.get(0).get("file")).isEmpty();
+    }
+
+    @Test
+    void get_returnsEmptyString_forUnknownColumn(@TempDir Path tempDir) throws Exception {
+        Path xlsx = write(tempDir, List.of("index"), List.of(List.of("W-0001")));
+
+        List<CanonicalSheetReader.Row> rows = CanonicalSheetReader.readRows(xlsx.toFile(), List.of("index"));
+
+        assertThat(rows.get(0).get("does_not_exist")).isEmpty();
+    }
+
+    @Test
+    void splitList_splitsOnPipe() {
+        assertThat(CanonicalSheetReader.splitList("a|b|c")).containsExactly("a", "b", "c");
+    }
+
+    @Test
+    void splitList_returnsEmptyList_forBlank() {
+        assertThat(CanonicalSheetReader.splitList("")).isEmpty();
+        assertThat(CanonicalSheetReader.splitList("   ")).isEmpty();
+    }
+
+    @Test
+    void splitList_returnsSingleElement_whenNoPipe() {
+        assertThat(CanonicalSheetReader.splitList("solo")).containsExactly("solo");
+    }
+
+    @Test
+    void splitList_trimsAndDropsEmptySegments() {
+        assertThat(CanonicalSheetReader.splitList("a| |b")).containsExactly("a", "b");
+    }
+
+    private Path write(Path dir, List<String> headers, List<List<String>> dataRows) throws Exception {
+        Path xlsx = dir.resolve("sheet.xlsx");
+        try (XSSFWorkbook wb = new XSSFWorkbook()) {
+            Sheet sheet = wb.createSheet("Sheet1");
+            Row header = sheet.createRow(0);
+            for (int i = 0; i < headers.size(); i++) {
+                header.createCell(i).setCellValue(headers.get(i));
+            }
+            for (int r = 0; r < dataRows.size(); r++) {
+                Row row = sheet.createRow(r + 1);
+                List<String> values = dataRows.get(r);
+                for (int c = 0; c < values.size(); c++) {
+                    row.createCell(c).setCellValue(values.get(c));
+                }
+            }
+            try (OutputStream out = Files.newOutputStream(xlsx)) {
+                wb.write(out);
+            }
+        }
+        return xlsx;
+    }
+}