feat(import): parse generation column in PersonRegisterImporter (#689)
Reads the optional `generation` cell by header name (REQUIRED_HEADERS is not extended — REQ-IMP-001 backward-compat for older artifacts), parses it through GENERATION_PATTERN (^\s*G?\s*(-?\d+)), and routes it into PersonUpsertCommand.generation. Out-of-range values (G 99, G -1) are skip-and-warned, never abort the batch; the post-parse range guard mirrors the V70 CHECK constraint so the DB never sees a value Bean Validation wouldn't accept. Pinned with a parametrised CsvSource covering every shape from the acceptance criteria plus a backward-compat test (artifact without a generation column still imports, all upserts get generation=null). Refs #689 Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -11,6 +11,8 @@ import java.io.File;
|
||||
import java.time.LocalDate;
|
||||
import java.time.format.DateTimeParseException;
|
||||
import java.util.List;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
/**
|
||||
* Loads {@code canonical-persons.xlsx} (the register) into the person domain via
|
||||
@@ -25,6 +27,15 @@ public class PersonRegisterImporter {
|
||||
|
||||
static final List<String> REQUIRED_HEADERS = List.of("person_id", "last_name", "first_name", "provisional");
|
||||
|
||||
// Matches a leading optional G then a signed integer. Anchored at the
|
||||
// start so noise can't slip in before the number, but tolerant of trailing
|
||||
// commentary cells (e.g. "G 2 de Gruyter") since curated rows sometimes
|
||||
// carry an inline note. Out-of-range values are caught by the post-parse
|
||||
// range guard, not by the regex.
|
||||
private static final Pattern GENERATION_PATTERN = Pattern.compile("^\\s*G?\\s*(-?\\d+)");
|
||||
private static final int GENERATION_MIN = 0;
|
||||
private static final int GENERATION_MAX = 10;
|
||||
|
||||
private final PersonService personService;
|
||||
|
||||
public int load(File artifact) {
|
||||
@@ -49,11 +60,31 @@ public class PersonRegisterImporter {
|
||||
.notes(blankToNull(row.get("notes")))
|
||||
.birthYear(yearOf(row.get("birth_date")))
|
||||
.deathYear(yearOf(row.get("death_date")))
|
||||
.generation(parseGeneration(row.get("generation"), personId))
|
||||
.personType(PersonType.PERSON)
|
||||
.provisional(Boolean.parseBoolean(row.get("provisional")))
|
||||
.build();
|
||||
}
|
||||
|
||||
/**
|
||||
* Parses an optional {@code G n} generation cell. Returns null for blanks,
|
||||
* non-matching strings, and any value outside {@value #GENERATION_MIN}..{@value #GENERATION_MAX}
|
||||
* (mirroring the V70 CHECK). Out-of-range values log a WARN but never abort
|
||||
* the batch — REQ-IMP-001.
|
||||
*/
|
||||
static Integer parseGeneration(String raw, String personId) {
|
||||
if (raw == null || raw.isBlank()) return null;
|
||||
Matcher m = GENERATION_PATTERN.matcher(raw);
|
||||
if (!m.find()) return null;
|
||||
int parsed = Integer.parseInt(m.group(1));
|
||||
if (parsed < GENERATION_MIN || parsed > GENERATION_MAX) {
|
||||
log.warn("Skipping out-of-range generation '{}' for row {}", raw, personId);
|
||||
return null;
|
||||
}
|
||||
log.debug("Parsed generation '{}' for person {}", raw, personId);
|
||||
return parsed;
|
||||
}
|
||||
|
||||
private static Integer yearOf(String isoDate) {
|
||||
if (isoDate == null || isoDate.isBlank()) return null;
|
||||
try {
|
||||
|
||||
@@ -6,6 +6,8 @@ import org.apache.poi.xssf.usermodel.XSSFWorkbook;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.junit.jupiter.api.extension.ExtendWith;
|
||||
import org.junit.jupiter.api.io.TempDir;
|
||||
import org.junit.jupiter.params.ParameterizedTest;
|
||||
import org.junit.jupiter.params.provider.CsvSource;
|
||||
import org.mockito.ArgumentCaptor;
|
||||
import org.mockito.junit.jupiter.MockitoExtension;
|
||||
import org.raddatz.familienarchiv.person.Person;
|
||||
@@ -87,6 +89,50 @@ class PersonRegisterImporterTest {
|
||||
assertThat(processed).isEqualTo(2);
|
||||
}
|
||||
|
||||
// ─── generation parsing (#689) ─────────────────────────────────────────────
|
||||
|
||||
@ParameterizedTest
|
||||
@CsvSource(value = {
|
||||
"'G 3', 3",
|
||||
"'G3', 3",
|
||||
"'G 3', 3",
|
||||
"'3', 3",
|
||||
"' 3 ', 3",
|
||||
"'G 2 de Gruyter', 2",
|
||||
"'', null",
|
||||
"'garbage', null",
|
||||
"'G 99', null",
|
||||
"'G -1', null"
|
||||
}, nullValues = "null")
|
||||
void load_parsesGeneration_perRegex(String raw, Integer expected, @TempDir Path tempDir) throws Exception {
|
||||
PersonService personService = mock(PersonService.class);
|
||||
when(personService.upsertBySourceRef(any())).thenAnswer(inv -> personOf(inv.getArgument(0)));
|
||||
Path xlsx = writePersonsWithGeneration(tempDir,
|
||||
rowWithGeneration("herbert-cram", "Cram", "Herbert", "", "", "False", raw));
|
||||
|
||||
new PersonRegisterImporter(personService).load(xlsx.toFile());
|
||||
|
||||
ArgumentCaptor<PersonUpsertCommand> captor = ArgumentCaptor.forClass(PersonUpsertCommand.class);
|
||||
verify(personService).upsertBySourceRef(captor.capture());
|
||||
assertThat(captor.getValue().generation()).isEqualTo(expected);
|
||||
}
|
||||
|
||||
@Test
|
||||
void load_succeeds_andLeavesGenerationNull_whenArtifactHasNoGenerationColumn(@TempDir Path tempDir) throws Exception {
|
||||
// REQ-IMP-001: older artifacts without the `generation` column must still
|
||||
// import. REQUIRED_HEADERS is intentionally not extended.
|
||||
PersonService personService = mock(PersonService.class);
|
||||
when(personService.upsertBySourceRef(any())).thenAnswer(inv -> personOf(inv.getArgument(0)));
|
||||
Path xlsx = writePersons(tempDir, row(
|
||||
"old-artifact", "Mueller", "Hans", "", "", "False"));
|
||||
|
||||
new PersonRegisterImporter(personService).load(xlsx.toFile());
|
||||
|
||||
ArgumentCaptor<PersonUpsertCommand> captor = ArgumentCaptor.forClass(PersonUpsertCommand.class);
|
||||
verify(personService).upsertBySourceRef(captor.capture());
|
||||
assertThat(captor.getValue().generation()).isNull();
|
||||
}
|
||||
|
||||
private static Person personOf(PersonUpsertCommand cmd) {
|
||||
return Person.builder().id(UUID.randomUUID()).sourceRef(cmd.sourceRef())
|
||||
.firstName(cmd.firstName()).lastName(cmd.lastName())
|
||||
@@ -127,4 +173,36 @@ class PersonRegisterImporterTest {
|
||||
}
|
||||
return xlsx;
|
||||
}
|
||||
|
||||
private Map<String, String> rowWithGeneration(String personId, String lastName, String firstName,
|
||||
String maidenName, String notes, String provisional,
|
||||
String generation) {
|
||||
Map<String, String> r = row(personId, lastName, firstName, maidenName, notes, provisional);
|
||||
r.put("generation", generation);
|
||||
return r;
|
||||
}
|
||||
|
||||
@SafeVarargs
|
||||
private Path writePersonsWithGeneration(Path dir, Map<String, String>... rows) throws Exception {
|
||||
Path xlsx = dir.resolve("canonical-persons.xlsx");
|
||||
List<String> headers = List.of(
|
||||
"person_id", "last_name", "first_name", "maiden_name", "notes", "provisional", "generation");
|
||||
try (XSSFWorkbook wb = new XSSFWorkbook()) {
|
||||
Sheet sheet = wb.createSheet("Sheet1");
|
||||
Row header = sheet.createRow(0);
|
||||
for (int i = 0; i < headers.size(); i++) {
|
||||
header.createCell(i).setCellValue(headers.get(i));
|
||||
}
|
||||
for (int r = 0; r < rows.length; r++) {
|
||||
Row row = sheet.createRow(r + 1);
|
||||
for (int c = 0; c < headers.size(); c++) {
|
||||
row.createCell(c).setCellValue(rows[r].getOrDefault(headers.get(c), ""));
|
||||
}
|
||||
}
|
||||
try (OutputStream out = Files.newOutputStream(xlsx)) {
|
||||
wb.write(out);
|
||||
}
|
||||
}
|
||||
return xlsx;
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user