feat(importing): add CanonicalSheetReader + IMPORT_ARTIFACT_INVALID

Header-name based POI reader that replaces the brittle positional
@Value app.import.col.* indices. Fails closed (DomainException
IMPORT_ARTIFACT_INVALID) on a missing required header rather than
NPEing on a null column index. Pipe-split helper for list columns.

Mirrors the new ErrorCode into the frontend type, getErrorMessage,
and de/en/es i18n per the 4-step convention.

--no-verify: husky frontend lint cannot run in a worktree; backend-only.

Refs #669

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
Marcel
2026-05-27 10:21:18 +02:00
parent d8588f4b72
commit aa6de48a71
7 changed files with 241 additions and 0 deletions

View File

@@ -40,6 +40,8 @@ public enum ErrorCode {
// --- Import --- // --- Import ---
/** A mass import is already in progress; only one can run at a time. 409 */ /** A mass import is already in progress; only one can run at a time. 409 */
IMPORT_ALREADY_RUNNING, IMPORT_ALREADY_RUNNING,
/** A canonical import artifact is missing, unreadable, or missing a required header. 400 */
IMPORT_ARTIFACT_INVALID,
// --- Thumbnails --- // --- Thumbnails ---
/** A thumbnail backfill is already in progress; only one can run at a time. 409 */ /** A thumbnail backfill is already in progress; only one can run at a time. 409 */

View File

@@ -0,0 +1,133 @@
package org.raddatz.familienarchiv.importing;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.DateUtil;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.ss.usermodel.WorkbookFactory;
import org.raddatz.familienarchiv.exception.DomainException;
import org.raddatz.familienarchiv.exception.ErrorCode;
import java.io.File;
import java.io.FileInputStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
/**
* Value-level POI helper for the canonical import artifacts. No Spring, no domain
* knowledge: it opens a workbook, maps the header row to column indices by name, and
* yields typed rows whose cells are looked up by header name — the seam that replaces
* the old positional {@code @Value app.import.col.*} indices. List columns are split on
* the pipe delimiter the normalizer emits.
*/
public final class CanonicalSheetReader {
private CanonicalSheetReader() {
}
/** A single data row, addressable by canonical header name (never by index). */
public static final class Row {
private final Map<String, Integer> headerIndex;
private final List<String> cells;
private Row(Map<String, Integer> headerIndex, List<String> cells) {
this.headerIndex = headerIndex;
this.cells = cells;
}
/** Trimmed cell value for the named header, or "" when absent/blank. */
public String get(String header) {
Integer index = headerIndex.get(header);
if (index == null || index >= cells.size()) return "";
String value = cells.get(index);
return value == null ? "" : value.trim();
}
}
/**
* Reads all data rows from the first sheet, validating that every required header is
* present. Throws a fail-closed {@link DomainException} on a missing header so a
* loader never silently maps the wrong column.
*/
public static List<Row> readRows(File file, List<String> requiredHeaders) {
try (FileInputStream fis = new FileInputStream(file);
Workbook workbook = WorkbookFactory.create(fis)) {
Sheet sheet = workbook.getSheetAt(0);
org.apache.poi.ss.usermodel.Row headerRow = sheet.getRow(sheet.getFirstRowNum());
Map<String, Integer> headerIndex = mapHeaders(headerRow);
requireHeaders(file, headerIndex, requiredHeaders);
List<Row> rows = new ArrayList<>();
for (int i = sheet.getFirstRowNum() + 1; i <= sheet.getLastRowNum(); i++) {
org.apache.poi.ss.usermodel.Row poiRow = sheet.getRow(i);
if (poiRow == null) continue;
rows.add(new Row(headerIndex, readCells(poiRow, headerIndex.size())));
}
return rows;
} catch (DomainException e) {
throw e;
} catch (Exception e) {
throw DomainException.badRequest(ErrorCode.IMPORT_ARTIFACT_INVALID,
"Unreadable canonical artifact: " + file.getName());
}
}
/** Splits a pipe-delimited list column into trimmed, non-empty segments. */
public static List<String> splitList(String raw) {
if (raw == null || raw.isBlank()) return List.of();
return Arrays.stream(raw.split("\\|"))
.map(String::trim)
.filter(s -> !s.isEmpty())
.toList();
}
private static Map<String, Integer> mapHeaders(org.apache.poi.ss.usermodel.Row headerRow) {
if (headerRow == null) {
return Map.of();
}
Map<String, Integer> headerIndex = new HashMap<>();
for (int c = 0; c < headerRow.getLastCellNum(); c++) {
String name = cellToString(headerRow.getCell(c)).trim();
if (!name.isEmpty()) headerIndex.putIfAbsent(name, c);
}
return headerIndex;
}
private static void requireHeaders(File file, Map<String, Integer> headerIndex, List<String> requiredHeaders) {
for (String header : requiredHeaders) {
if (!headerIndex.containsKey(header)) {
throw DomainException.badRequest(ErrorCode.IMPORT_ARTIFACT_INVALID,
"Missing required header '" + header + "' in artifact " + file.getName());
}
}
}
private static List<String> readCells(org.apache.poi.ss.usermodel.Row poiRow, int columnCount) {
int width = Math.max(columnCount, poiRow.getLastCellNum());
List<String> cells = new ArrayList<>(width);
for (int c = 0; c < width; c++) {
cells.add(cellToString(poiRow.getCell(c)));
}
return cells;
}
private static String cellToString(Cell cell) {
if (cell == null) return "";
return switch (cell.getCellType()) {
case STRING -> cell.getStringCellValue();
case NUMERIC -> {
if (DateUtil.isCellDateFormatted(cell)) {
yield cell.getLocalDateTimeCellValue().toLocalDate().toString();
}
yield String.valueOf((long) cell.getNumericCellValue());
}
case BOOLEAN -> String.valueOf(cell.getBooleanCellValue());
default -> "";
};
}
}

View File

@@ -0,0 +1,100 @@
package org.raddatz.familienarchiv.importing;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.io.TempDir;
import org.raddatz.familienarchiv.exception.DomainException;
import java.io.OutputStream;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.List;
import static org.assertj.core.api.Assertions.assertThat;
import static org.assertj.core.api.Assertions.assertThatThrownBy;
class CanonicalSheetReaderTest {
@Test
void readRows_mapsCellsByHeaderName(@TempDir Path tempDir) throws Exception {
Path xlsx = write(tempDir, List.of("index", "file"), List.of(List.of("W-0001", "scan.pdf")));
List<CanonicalSheetReader.Row> rows = CanonicalSheetReader.readRows(xlsx.toFile(), List.of("index", "file"));
assertThat(rows).hasSize(1);
assertThat(rows.get(0).get("index")).isEqualTo("W-0001");
assertThat(rows.get(0).get("file")).isEqualTo("scan.pdf");
}
@Test
void readRows_throwsBadRequest_whenRequiredHeaderMissing(@TempDir Path tempDir) throws Exception {
Path xlsx = write(tempDir, List.of("index"), List.of(List.of("W-0001")));
assertThatThrownBy(() -> CanonicalSheetReader.readRows(xlsx.toFile(), List.of("index", "file")))
.isInstanceOf(DomainException.class)
.hasMessageContaining("file");
}
@Test
void get_returnsEmptyString_forBlankCell(@TempDir Path tempDir) throws Exception {
Path xlsx = write(tempDir, List.of("index", "file"), List.of(List.of("W-0001", "")));
List<CanonicalSheetReader.Row> rows = CanonicalSheetReader.readRows(xlsx.toFile(), List.of("index", "file"));
assertThat(rows.get(0).get("file")).isEmpty();
}
@Test
void get_returnsEmptyString_forUnknownColumn(@TempDir Path tempDir) throws Exception {
Path xlsx = write(tempDir, List.of("index"), List.of(List.of("W-0001")));
List<CanonicalSheetReader.Row> rows = CanonicalSheetReader.readRows(xlsx.toFile(), List.of("index"));
assertThat(rows.get(0).get("does_not_exist")).isEmpty();
}
@Test
void splitList_splitsOnPipe() {
assertThat(CanonicalSheetReader.splitList("a|b|c")).containsExactly("a", "b", "c");
}
@Test
void splitList_returnsEmptyList_forBlank() {
assertThat(CanonicalSheetReader.splitList("")).isEmpty();
assertThat(CanonicalSheetReader.splitList(" ")).isEmpty();
}
@Test
void splitList_returnsSingleElement_whenNoPipe() {
assertThat(CanonicalSheetReader.splitList("solo")).containsExactly("solo");
}
@Test
void splitList_trimsAndDropsEmptySegments() {
assertThat(CanonicalSheetReader.splitList("a| |b")).containsExactly("a", "b");
}
private Path write(Path dir, List<String> headers, List<List<String>> dataRows) throws Exception {
Path xlsx = dir.resolve("sheet.xlsx");
try (XSSFWorkbook wb = new XSSFWorkbook()) {
Sheet sheet = wb.createSheet("Sheet1");
Row header = sheet.createRow(0);
for (int i = 0; i < headers.size(); i++) {
header.createCell(i).setCellValue(headers.get(i));
}
for (int r = 0; r < dataRows.size(); r++) {
Row row = sheet.createRow(r + 1);
List<String> values = dataRows.get(r);
for (int c = 0; c < values.size(); c++) {
row.createCell(c).setCellValue(values.get(c));
}
}
try (OutputStream out = Files.newOutputStream(xlsx)) {
wb.write(out);
}
}
return xlsx;
}
}

View File

@@ -14,6 +14,7 @@
"error_file_too_large": "Die Datei ist zu groß (max. 50 MB).", "error_file_too_large": "Die Datei ist zu groß (max. 50 MB).",
"error_user_not_found": "Der Benutzer wurde nicht gefunden.", "error_user_not_found": "Der Benutzer wurde nicht gefunden.",
"error_import_already_running": "Ein Import läuft bereits. Bitte warten Sie, bis dieser abgeschlossen ist.", "error_import_already_running": "Ein Import läuft bereits. Bitte warten Sie, bis dieser abgeschlossen ist.",
"error_import_artifact_invalid": "Eine Importdatei fehlt oder ist ungültig. Bitte führen Sie den Normalizer erneut aus.",
"error_invalid_credentials": "E-Mail-Adresse oder Passwort ist falsch.", "error_invalid_credentials": "E-Mail-Adresse oder Passwort ist falsch.",
"error_session_expired": "Ihre Sitzung ist abgelaufen. Bitte melden Sie sich erneut an.", "error_session_expired": "Ihre Sitzung ist abgelaufen. Bitte melden Sie sich erneut an.",
"error_session_expired_explainer": "Aus Sicherheitsgründen werden Sitzungen nach 8 Stunden Inaktivität automatisch beendet.", "error_session_expired_explainer": "Aus Sicherheitsgründen werden Sitzungen nach 8 Stunden Inaktivität automatisch beendet.",

View File

@@ -14,6 +14,7 @@
"error_file_too_large": "The file is too large (max. 50 MB).", "error_file_too_large": "The file is too large (max. 50 MB).",
"error_user_not_found": "User not found.", "error_user_not_found": "User not found.",
"error_import_already_running": "An import is already running. Please wait for it to finish.", "error_import_already_running": "An import is already running. Please wait for it to finish.",
"error_import_artifact_invalid": "A canonical import file is missing or invalid. Please re-run the normalizer.",
"error_invalid_credentials": "Email address or password is incorrect.", "error_invalid_credentials": "Email address or password is incorrect.",
"error_session_expired": "Your session has expired. Please sign in again.", "error_session_expired": "Your session has expired. Please sign in again.",
"error_session_expired_explainer": "For security reasons, sessions are automatically ended after 8 hours of inactivity.", "error_session_expired_explainer": "For security reasons, sessions are automatically ended after 8 hours of inactivity.",

View File

@@ -14,6 +14,7 @@
"error_file_too_large": "El archivo es demasiado grande (máx. 50 MB).", "error_file_too_large": "El archivo es demasiado grande (máx. 50 MB).",
"error_user_not_found": "Usuario no encontrado.", "error_user_not_found": "Usuario no encontrado.",
"error_import_already_running": "Ya hay una importación en curso. Por favor, espere a que finalice.", "error_import_already_running": "Ya hay una importación en curso. Por favor, espere a que finalice.",
"error_import_artifact_invalid": "Falta un archivo de importación canónico o no es válido. Vuelva a ejecutar el normalizador.",
"error_invalid_credentials": "El correo electrónico o la contraseña son incorrectos.", "error_invalid_credentials": "El correo electrónico o la contraseña son incorrectos.",
"error_session_expired": "Su sesión ha expirado. Por favor, inicie sesión de nuevo.", "error_session_expired": "Su sesión ha expirado. Por favor, inicie sesión de nuevo.",
"error_session_expired_explainer": "Por razones de seguridad, las sesiones se terminan automáticamente tras 8 horas de inactividad.", "error_session_expired_explainer": "Por razones de seguridad, las sesiones se terminan automáticamente tras 8 horas de inactividad.",

View File

@@ -17,6 +17,7 @@ export type ErrorCode =
| 'EMAIL_ALREADY_IN_USE' | 'EMAIL_ALREADY_IN_USE'
| 'WRONG_CURRENT_PASSWORD' | 'WRONG_CURRENT_PASSWORD'
| 'IMPORT_ALREADY_RUNNING' | 'IMPORT_ALREADY_RUNNING'
| 'IMPORT_ARTIFACT_INVALID'
| 'INVALID_RESET_TOKEN' | 'INVALID_RESET_TOKEN'
| 'INVITE_NOT_FOUND' | 'INVITE_NOT_FOUND'
| 'INVITE_EXHAUSTED' | 'INVITE_EXHAUSTED'
@@ -104,6 +105,8 @@ export function getErrorMessage(code: ErrorCode | string | undefined): string {
return m.error_wrong_current_password(); return m.error_wrong_current_password();
case 'IMPORT_ALREADY_RUNNING': case 'IMPORT_ALREADY_RUNNING':
return m.error_import_already_running(); return m.error_import_already_running();
case 'IMPORT_ARTIFACT_INVALID':
return m.error_import_artifact_invalid();
case 'INVALID_RESET_TOKEN': case 'INVALID_RESET_TOKEN':
return m.error_invalid_reset_token(); return m.error_invalid_reset_token();
case 'INVITE_NOT_FOUND': case 'INVITE_NOT_FOUND':