feat(import): parse generation column in PersonRegisterImporter (#689)

Reads the optional `generation` cell by header name (REQUIRED_HEADERS is
not extended — REQ-IMP-001 backward-compat for older artifacts), parses
it through GENERATION_PATTERN (^\s*G?\s*(-?\d+)), and routes it into
PersonUpsertCommand.generation.

Out-of-range values (G 99, G -1) are skip-and-warned, never abort the
batch; the post-parse range guard mirrors the V70 CHECK constraint so
the DB never sees a value Bean Validation wouldn't accept.

Pinned with a parametrised CsvSource covering every shape from the
acceptance criteria plus a backward-compat test (artifact without a
generation column still imports, all upserts get generation=null).

Refs #689

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
Marcel
2026-05-28 15:30:31 +02:00
parent dcb5585c64
commit df0037cba2
2 changed files with 109 additions and 0 deletions

View File

@@ -11,6 +11,8 @@ import java.io.File;
import java.time.LocalDate; import java.time.LocalDate;
import java.time.format.DateTimeParseException; import java.time.format.DateTimeParseException;
import java.util.List; import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/** /**
* Loads {@code canonical-persons.xlsx} (the register) into the person domain via * Loads {@code canonical-persons.xlsx} (the register) into the person domain via
@@ -25,6 +27,15 @@ public class PersonRegisterImporter {
static final List<String> REQUIRED_HEADERS = List.of("person_id", "last_name", "first_name", "provisional"); static final List<String> REQUIRED_HEADERS = List.of("person_id", "last_name", "first_name", "provisional");
// Matches a leading optional G then a signed integer. Anchored at the
// start so noise can't slip in before the number, but tolerant of trailing
// commentary cells (e.g. "G 2 de Gruyter") since curated rows sometimes
// carry an inline note. Out-of-range values are caught by the post-parse
// range guard, not by the regex.
private static final Pattern GENERATION_PATTERN = Pattern.compile("^\\s*G?\\s*(-?\\d+)");
private static final int GENERATION_MIN = 0;
private static final int GENERATION_MAX = 10;
private final PersonService personService; private final PersonService personService;
public int load(File artifact) { public int load(File artifact) {
@@ -49,11 +60,31 @@ public class PersonRegisterImporter {
.notes(blankToNull(row.get("notes"))) .notes(blankToNull(row.get("notes")))
.birthYear(yearOf(row.get("birth_date"))) .birthYear(yearOf(row.get("birth_date")))
.deathYear(yearOf(row.get("death_date"))) .deathYear(yearOf(row.get("death_date")))
.generation(parseGeneration(row.get("generation"), personId))
.personType(PersonType.PERSON) .personType(PersonType.PERSON)
.provisional(Boolean.parseBoolean(row.get("provisional"))) .provisional(Boolean.parseBoolean(row.get("provisional")))
.build(); .build();
} }
/**
* Parses an optional {@code G n} generation cell. Returns null for blanks,
* non-matching strings, and any value outside {@value #GENERATION_MIN}..{@value #GENERATION_MAX}
* (mirroring the V70 CHECK). Out-of-range values log a WARN but never abort
* the batch — REQ-IMP-001.
*/
static Integer parseGeneration(String raw, String personId) {
if (raw == null || raw.isBlank()) return null;
Matcher m = GENERATION_PATTERN.matcher(raw);
if (!m.find()) return null;
int parsed = Integer.parseInt(m.group(1));
if (parsed < GENERATION_MIN || parsed > GENERATION_MAX) {
log.warn("Skipping out-of-range generation '{}' for row {}", raw, personId);
return null;
}
log.debug("Parsed generation '{}' for person {}", raw, personId);
return parsed;
}
private static Integer yearOf(String isoDate) { private static Integer yearOf(String isoDate) {
if (isoDate == null || isoDate.isBlank()) return null; if (isoDate == null || isoDate.isBlank()) return null;
try { try {

View File

@@ -6,6 +6,8 @@ import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import org.junit.jupiter.api.Test; import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.extension.ExtendWith; import org.junit.jupiter.api.extension.ExtendWith;
import org.junit.jupiter.api.io.TempDir; import org.junit.jupiter.api.io.TempDir;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.CsvSource;
import org.mockito.ArgumentCaptor; import org.mockito.ArgumentCaptor;
import org.mockito.junit.jupiter.MockitoExtension; import org.mockito.junit.jupiter.MockitoExtension;
import org.raddatz.familienarchiv.person.Person; import org.raddatz.familienarchiv.person.Person;
@@ -87,6 +89,50 @@ class PersonRegisterImporterTest {
assertThat(processed).isEqualTo(2); assertThat(processed).isEqualTo(2);
} }
// ─── generation parsing (#689) ─────────────────────────────────────────────
@ParameterizedTest
@CsvSource(value = {
"'G 3', 3",
"'G3', 3",
"'G 3', 3",
"'3', 3",
"' 3 ', 3",
"'G 2 de Gruyter', 2",
"'', null",
"'garbage', null",
"'G 99', null",
"'G -1', null"
}, nullValues = "null")
void load_parsesGeneration_perRegex(String raw, Integer expected, @TempDir Path tempDir) throws Exception {
PersonService personService = mock(PersonService.class);
when(personService.upsertBySourceRef(any())).thenAnswer(inv -> personOf(inv.getArgument(0)));
Path xlsx = writePersonsWithGeneration(tempDir,
rowWithGeneration("herbert-cram", "Cram", "Herbert", "", "", "False", raw));
new PersonRegisterImporter(personService).load(xlsx.toFile());
ArgumentCaptor<PersonUpsertCommand> captor = ArgumentCaptor.forClass(PersonUpsertCommand.class);
verify(personService).upsertBySourceRef(captor.capture());
assertThat(captor.getValue().generation()).isEqualTo(expected);
}
@Test
void load_succeeds_andLeavesGenerationNull_whenArtifactHasNoGenerationColumn(@TempDir Path tempDir) throws Exception {
// REQ-IMP-001: older artifacts without the `generation` column must still
// import. REQUIRED_HEADERS is intentionally not extended.
PersonService personService = mock(PersonService.class);
when(personService.upsertBySourceRef(any())).thenAnswer(inv -> personOf(inv.getArgument(0)));
Path xlsx = writePersons(tempDir, row(
"old-artifact", "Mueller", "Hans", "", "", "False"));
new PersonRegisterImporter(personService).load(xlsx.toFile());
ArgumentCaptor<PersonUpsertCommand> captor = ArgumentCaptor.forClass(PersonUpsertCommand.class);
verify(personService).upsertBySourceRef(captor.capture());
assertThat(captor.getValue().generation()).isNull();
}
private static Person personOf(PersonUpsertCommand cmd) { private static Person personOf(PersonUpsertCommand cmd) {
return Person.builder().id(UUID.randomUUID()).sourceRef(cmd.sourceRef()) return Person.builder().id(UUID.randomUUID()).sourceRef(cmd.sourceRef())
.firstName(cmd.firstName()).lastName(cmd.lastName()) .firstName(cmd.firstName()).lastName(cmd.lastName())
@@ -127,4 +173,36 @@ class PersonRegisterImporterTest {
} }
return xlsx; return xlsx;
} }
private Map<String, String> rowWithGeneration(String personId, String lastName, String firstName,
String maidenName, String notes, String provisional,
String generation) {
Map<String, String> r = row(personId, lastName, firstName, maidenName, notes, provisional);
r.put("generation", generation);
return r;
}
@SafeVarargs
private Path writePersonsWithGeneration(Path dir, Map<String, String>... rows) throws Exception {
Path xlsx = dir.resolve("canonical-persons.xlsx");
List<String> headers = List.of(
"person_id", "last_name", "first_name", "maiden_name", "notes", "provisional", "generation");
try (XSSFWorkbook wb = new XSSFWorkbook()) {
Sheet sheet = wb.createSheet("Sheet1");
Row header = sheet.createRow(0);
for (int i = 0; i < headers.size(); i++) {
header.createCell(i).setCellValue(headers.get(i));
}
for (int r = 0; r < rows.length; r++) {
Row row = sheet.createRow(r + 1);
for (int c = 0; c < headers.size(); c++) {
row.createCell(c).setCellValue(rows[r].getOrDefault(headers.get(c), ""));
}
}
try (OutputStream out = Files.newOutputStream(xlsx)) {
wb.write(out);
}
}
return xlsx;
}
} }