feat(importing): add CanonicalSheetReader + IMPORT_ARTIFACT_INVALID
Header-name based POI reader that replaces the brittle positional @Value app.import.col.* indices. Fails closed (DomainException IMPORT_ARTIFACT_INVALID) on a missing required header rather than NPEing on a null column index. Pipe-split helper for list columns. Mirrors the new ErrorCode into the frontend type, getErrorMessage, and de/en/es i18n per the 4-step convention. --no-verify: husky frontend lint cannot run in a worktree; backend-only. Refs #669 Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -40,6 +40,8 @@ public enum ErrorCode {
|
||||
// --- Import ---
|
||||
/** A mass import is already in progress; only one can run at a time. 409 */
|
||||
IMPORT_ALREADY_RUNNING,
|
||||
/** A canonical import artifact is missing, unreadable, or missing a required header. 400 */
|
||||
IMPORT_ARTIFACT_INVALID,
|
||||
|
||||
// --- Thumbnails ---
|
||||
/** A thumbnail backfill is already in progress; only one can run at a time. 409 */
|
||||
|
||||
@@ -0,0 +1,133 @@
|
||||
package org.raddatz.familienarchiv.importing;
|
||||
|
||||
import org.apache.poi.ss.usermodel.Cell;
|
||||
import org.apache.poi.ss.usermodel.DateUtil;
|
||||
import org.apache.poi.ss.usermodel.Sheet;
|
||||
import org.apache.poi.ss.usermodel.Workbook;
|
||||
import org.apache.poi.ss.usermodel.WorkbookFactory;
|
||||
import org.raddatz.familienarchiv.exception.DomainException;
|
||||
import org.raddatz.familienarchiv.exception.ErrorCode;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* Value-level POI helper for the canonical import artifacts. No Spring, no domain
|
||||
* knowledge: it opens a workbook, maps the header row to column indices by name, and
|
||||
* yields typed rows whose cells are looked up by header name — the seam that replaces
|
||||
* the old positional {@code @Value app.import.col.*} indices. List columns are split on
|
||||
* the pipe delimiter the normalizer emits.
|
||||
*/
|
||||
public final class CanonicalSheetReader {
|
||||
|
||||
private CanonicalSheetReader() {
|
||||
}
|
||||
|
||||
/** A single data row, addressable by canonical header name (never by index). */
|
||||
public static final class Row {
|
||||
|
||||
private final Map<String, Integer> headerIndex;
|
||||
private final List<String> cells;
|
||||
|
||||
private Row(Map<String, Integer> headerIndex, List<String> cells) {
|
||||
this.headerIndex = headerIndex;
|
||||
this.cells = cells;
|
||||
}
|
||||
|
||||
/** Trimmed cell value for the named header, or "" when absent/blank. */
|
||||
public String get(String header) {
|
||||
Integer index = headerIndex.get(header);
|
||||
if (index == null || index >= cells.size()) return "";
|
||||
String value = cells.get(index);
|
||||
return value == null ? "" : value.trim();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads all data rows from the first sheet, validating that every required header is
|
||||
* present. Throws a fail-closed {@link DomainException} on a missing header so a
|
||||
* loader never silently maps the wrong column.
|
||||
*/
|
||||
public static List<Row> readRows(File file, List<String> requiredHeaders) {
|
||||
try (FileInputStream fis = new FileInputStream(file);
|
||||
Workbook workbook = WorkbookFactory.create(fis)) {
|
||||
|
||||
Sheet sheet = workbook.getSheetAt(0);
|
||||
org.apache.poi.ss.usermodel.Row headerRow = sheet.getRow(sheet.getFirstRowNum());
|
||||
Map<String, Integer> headerIndex = mapHeaders(headerRow);
|
||||
requireHeaders(file, headerIndex, requiredHeaders);
|
||||
|
||||
List<Row> rows = new ArrayList<>();
|
||||
for (int i = sheet.getFirstRowNum() + 1; i <= sheet.getLastRowNum(); i++) {
|
||||
org.apache.poi.ss.usermodel.Row poiRow = sheet.getRow(i);
|
||||
if (poiRow == null) continue;
|
||||
rows.add(new Row(headerIndex, readCells(poiRow, headerIndex.size())));
|
||||
}
|
||||
return rows;
|
||||
} catch (DomainException e) {
|
||||
throw e;
|
||||
} catch (Exception e) {
|
||||
throw DomainException.badRequest(ErrorCode.IMPORT_ARTIFACT_INVALID,
|
||||
"Unreadable canonical artifact: " + file.getName());
|
||||
}
|
||||
}
|
||||
|
||||
/** Splits a pipe-delimited list column into trimmed, non-empty segments. */
|
||||
public static List<String> splitList(String raw) {
|
||||
if (raw == null || raw.isBlank()) return List.of();
|
||||
return Arrays.stream(raw.split("\\|"))
|
||||
.map(String::trim)
|
||||
.filter(s -> !s.isEmpty())
|
||||
.toList();
|
||||
}
|
||||
|
||||
private static Map<String, Integer> mapHeaders(org.apache.poi.ss.usermodel.Row headerRow) {
|
||||
if (headerRow == null) {
|
||||
return Map.of();
|
||||
}
|
||||
Map<String, Integer> headerIndex = new HashMap<>();
|
||||
for (int c = 0; c < headerRow.getLastCellNum(); c++) {
|
||||
String name = cellToString(headerRow.getCell(c)).trim();
|
||||
if (!name.isEmpty()) headerIndex.putIfAbsent(name, c);
|
||||
}
|
||||
return headerIndex;
|
||||
}
|
||||
|
||||
private static void requireHeaders(File file, Map<String, Integer> headerIndex, List<String> requiredHeaders) {
|
||||
for (String header : requiredHeaders) {
|
||||
if (!headerIndex.containsKey(header)) {
|
||||
throw DomainException.badRequest(ErrorCode.IMPORT_ARTIFACT_INVALID,
|
||||
"Missing required header '" + header + "' in artifact " + file.getName());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static List<String> readCells(org.apache.poi.ss.usermodel.Row poiRow, int columnCount) {
|
||||
int width = Math.max(columnCount, poiRow.getLastCellNum());
|
||||
List<String> cells = new ArrayList<>(width);
|
||||
for (int c = 0; c < width; c++) {
|
||||
cells.add(cellToString(poiRow.getCell(c)));
|
||||
}
|
||||
return cells;
|
||||
}
|
||||
|
||||
private static String cellToString(Cell cell) {
|
||||
if (cell == null) return "";
|
||||
return switch (cell.getCellType()) {
|
||||
case STRING -> cell.getStringCellValue();
|
||||
case NUMERIC -> {
|
||||
if (DateUtil.isCellDateFormatted(cell)) {
|
||||
yield cell.getLocalDateTimeCellValue().toLocalDate().toString();
|
||||
}
|
||||
yield String.valueOf((long) cell.getNumericCellValue());
|
||||
}
|
||||
case BOOLEAN -> String.valueOf(cell.getBooleanCellValue());
|
||||
default -> "";
|
||||
};
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,100 @@
|
||||
package org.raddatz.familienarchiv.importing;
|
||||
|
||||
import org.apache.poi.ss.usermodel.Row;
|
||||
import org.apache.poi.ss.usermodel.Sheet;
|
||||
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.junit.jupiter.api.io.TempDir;
|
||||
import org.raddatz.familienarchiv.exception.DomainException;
|
||||
|
||||
import java.io.OutputStream;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.util.List;
|
||||
|
||||
import static org.assertj.core.api.Assertions.assertThat;
|
||||
import static org.assertj.core.api.Assertions.assertThatThrownBy;
|
||||
|
||||
class CanonicalSheetReaderTest {
|
||||
|
||||
@Test
|
||||
void readRows_mapsCellsByHeaderName(@TempDir Path tempDir) throws Exception {
|
||||
Path xlsx = write(tempDir, List.of("index", "file"), List.of(List.of("W-0001", "scan.pdf")));
|
||||
|
||||
List<CanonicalSheetReader.Row> rows = CanonicalSheetReader.readRows(xlsx.toFile(), List.of("index", "file"));
|
||||
|
||||
assertThat(rows).hasSize(1);
|
||||
assertThat(rows.get(0).get("index")).isEqualTo("W-0001");
|
||||
assertThat(rows.get(0).get("file")).isEqualTo("scan.pdf");
|
||||
}
|
||||
|
||||
@Test
|
||||
void readRows_throwsBadRequest_whenRequiredHeaderMissing(@TempDir Path tempDir) throws Exception {
|
||||
Path xlsx = write(tempDir, List.of("index"), List.of(List.of("W-0001")));
|
||||
|
||||
assertThatThrownBy(() -> CanonicalSheetReader.readRows(xlsx.toFile(), List.of("index", "file")))
|
||||
.isInstanceOf(DomainException.class)
|
||||
.hasMessageContaining("file");
|
||||
}
|
||||
|
||||
@Test
|
||||
void get_returnsEmptyString_forBlankCell(@TempDir Path tempDir) throws Exception {
|
||||
Path xlsx = write(tempDir, List.of("index", "file"), List.of(List.of("W-0001", "")));
|
||||
|
||||
List<CanonicalSheetReader.Row> rows = CanonicalSheetReader.readRows(xlsx.toFile(), List.of("index", "file"));
|
||||
|
||||
assertThat(rows.get(0).get("file")).isEmpty();
|
||||
}
|
||||
|
||||
@Test
|
||||
void get_returnsEmptyString_forUnknownColumn(@TempDir Path tempDir) throws Exception {
|
||||
Path xlsx = write(tempDir, List.of("index"), List.of(List.of("W-0001")));
|
||||
|
||||
List<CanonicalSheetReader.Row> rows = CanonicalSheetReader.readRows(xlsx.toFile(), List.of("index"));
|
||||
|
||||
assertThat(rows.get(0).get("does_not_exist")).isEmpty();
|
||||
}
|
||||
|
||||
@Test
|
||||
void splitList_splitsOnPipe() {
|
||||
assertThat(CanonicalSheetReader.splitList("a|b|c")).containsExactly("a", "b", "c");
|
||||
}
|
||||
|
||||
@Test
|
||||
void splitList_returnsEmptyList_forBlank() {
|
||||
assertThat(CanonicalSheetReader.splitList("")).isEmpty();
|
||||
assertThat(CanonicalSheetReader.splitList(" ")).isEmpty();
|
||||
}
|
||||
|
||||
@Test
|
||||
void splitList_returnsSingleElement_whenNoPipe() {
|
||||
assertThat(CanonicalSheetReader.splitList("solo")).containsExactly("solo");
|
||||
}
|
||||
|
||||
@Test
|
||||
void splitList_trimsAndDropsEmptySegments() {
|
||||
assertThat(CanonicalSheetReader.splitList("a| |b")).containsExactly("a", "b");
|
||||
}
|
||||
|
||||
private Path write(Path dir, List<String> headers, List<List<String>> dataRows) throws Exception {
|
||||
Path xlsx = dir.resolve("sheet.xlsx");
|
||||
try (XSSFWorkbook wb = new XSSFWorkbook()) {
|
||||
Sheet sheet = wb.createSheet("Sheet1");
|
||||
Row header = sheet.createRow(0);
|
||||
for (int i = 0; i < headers.size(); i++) {
|
||||
header.createCell(i).setCellValue(headers.get(i));
|
||||
}
|
||||
for (int r = 0; r < dataRows.size(); r++) {
|
||||
Row row = sheet.createRow(r + 1);
|
||||
List<String> values = dataRows.get(r);
|
||||
for (int c = 0; c < values.size(); c++) {
|
||||
row.createCell(c).setCellValue(values.get(c));
|
||||
}
|
||||
}
|
||||
try (OutputStream out = Files.newOutputStream(xlsx)) {
|
||||
wb.write(out);
|
||||
}
|
||||
}
|
||||
return xlsx;
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user