test(importing): Testcontainers idempotency + human-edit-preserve IT

Full-stack integration test on real postgres:16-alpine (the UNIQUE(source_ref)
+ upsert-on-conflict only exist in real Postgres, never H2). Writes a
synthetic-but-real four-artifact set, runs the import twice, and asserts
person/tag/document counts are identical on re-import (no duplicates), plus
the Resolved-decision-#1 precedence: a person field edited in-app survives a
re-import. Also asserts register-first sender linkage with raw-text retention
and the provisional contract.

Fixes a re-import bug the IT surfaced: load() is now @Transactional so an
existing document's lazy receivers collection initialises within the session
(the previous self-invoked @Transactional on the per-row method never opened
a transaction). PersonTreeImporter owns its ObjectMapper rather than
depending on the web bean, which is absent in a NONE web environment.

Refs #669

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
Marcel
2026-05-27 10:41:08 +02:00
parent 459ba14207
commit 9cc682cf72
4 changed files with 184 additions and 9 deletions

View File

@@ -76,6 +76,10 @@ public class DocumentImporter {
/** Outcome of loading the document sheet: processed count + per-file skips. */
public record LoadResult(int processed, List<ImportStatus.SkippedFile> skippedFiles) {}
// One transaction for the whole sheet keeps the Hibernate session open so an existing
// document's lazy receivers collection initialises during an idempotent re-import.
// Invoked cross-bean from the orchestrator, so the @Transactional proxy applies.
@Transactional
public LoadResult load(File artifact) {
List<CanonicalSheetReader.Row> rows = CanonicalSheetReader.readRows(artifact, REQUIRED_HEADERS);
int processed = 0;
@@ -116,8 +120,7 @@ public class DocumentImporter {
return persist(row, index, resolved);
}
@Transactional
protected Optional<ImportStatus.SkipReason> persist(CanonicalSheetReader.Row row, String index, Optional<File> file) {
private Optional<ImportStatus.SkipReason> persist(CanonicalSheetReader.Row row, String index, Optional<File> file) {
Document existing = documentService.findByOriginalFilename(index).orElse(null);
if (existing != null && existing.getStatus() != DocumentStatus.PLACEHOLDER) {
return Optional.of(ImportStatus.SkipReason.ALREADY_EXISTS);

View File

@@ -34,9 +34,12 @@ import java.util.UUID;
@Slf4j
public class PersonTreeImporter {
// The tree JSON is a local implementation detail, not a shared API payload, so the
// importer owns its own mapper rather than depending on the web ObjectMapper bean.
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
private final PersonService personService;
private final RelationshipService relationshipService;
private final ObjectMapper objectMapper;
public int load(File artifact) {
JsonNode root = readTree(artifact);
@@ -49,7 +52,7 @@ public class PersonTreeImporter {
private JsonNode readTree(File artifact) {
try {
return objectMapper.readTree(artifact);
return OBJECT_MAPPER.readTree(artifact);
} catch (Exception e) {
throw DomainException.badRequest(ErrorCode.IMPORT_ARTIFACT_INVALID,
"Unreadable canonical artifact: " + artifact.getName());

View File

@@ -0,0 +1,170 @@
package org.raddatz.familienarchiv.importing;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.raddatz.familienarchiv.PostgresContainerConfig;
import org.raddatz.familienarchiv.document.Document;
import org.raddatz.familienarchiv.document.DocumentRepository;
import org.raddatz.familienarchiv.document.DocumentStatus;
import org.raddatz.familienarchiv.person.Person;
import org.raddatz.familienarchiv.person.PersonRepository;
import org.raddatz.familienarchiv.tag.TagRepository;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.test.context.SpringBootTest;
import org.springframework.context.annotation.Import;
import org.springframework.test.context.ActiveProfiles;
import org.springframework.test.context.bean.override.mockito.MockitoBean;
import org.springframework.test.util.ReflectionTestUtils;
import software.amazon.awssdk.services.s3.S3Client;
import java.io.OutputStream;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.List;
import java.util.Optional;
import static org.assertj.core.api.Assertions.assertThat;
/**
* Real Postgres (Testcontainers) integration test for the canonical importer. The
* {@code UNIQUE(source_ref)} constraint and the upsert-on-conflict behaviour only exist
* in real Postgres (never H2), so idempotency is verified here. S3 is mocked — the
* synthetic document rows carry no on-disk files, so every document is a PLACEHOLDER and
* no upload is attempted.
*/
@SpringBootTest(webEnvironment = SpringBootTest.WebEnvironment.NONE)
@ActiveProfiles("test")
@Import(PostgresContainerConfig.class)
class CanonicalImportIntegrationTest {
@MockitoBean S3Client s3Client;
@Autowired CanonicalImportOrchestrator orchestrator;
@Autowired PersonRepository personRepository;
@Autowired TagRepository tagRepository;
@Autowired DocumentRepository documentRepository;
Path artifactDir;
@BeforeEach
void setUp() throws Exception {
documentRepository.deleteAll();
personRepository.deleteAll();
tagRepository.deleteAll();
artifactDir = Files.createTempDirectory("canonical-import-it");
writeArtifacts(artifactDir);
ReflectionTestUtils.setField(orchestrator, "canonicalDir", artifactDir.toString());
}
@Test
void reimport_isIdempotent_noDuplicatePersonsTagsOrDocuments() {
orchestrator.runImport();
long personsAfterFirst = personRepository.count();
long tagsAfterFirst = tagRepository.count();
long documentsAfterFirst = documentRepository.count();
assertThat(orchestrator.getStatus().state()).isEqualTo(ImportStatus.State.DONE);
assertThat(personsAfterFirst).isPositive();
assertThat(tagsAfterFirst).isPositive();
assertThat(documentsAfterFirst).isPositive();
orchestrator.runImport();
assertThat(personRepository.count()).isEqualTo(personsAfterFirst);
assertThat(tagRepository.count()).isEqualTo(tagsAfterFirst);
assertThat(documentRepository.count()).isEqualTo(documentsAfterFirst);
}
@Test
void reimport_preservesHumanEditedPersonField() {
orchestrator.runImport();
Person walter = personRepository.findBySourceRef("de-gruyter-walter").orElseThrow();
walter.setNotes("Verified by archivist");
walter.setFirstName("Walther");
personRepository.save(walter);
orchestrator.runImport();
Person reimported = personRepository.findBySourceRef("de-gruyter-walter").orElseThrow();
assertThat(reimported.getNotes()).isEqualTo("Verified by archivist");
assertThat(reimported.getFirstName()).isEqualTo("Walther");
}
@Test
void import_linksDocumentSenderToRegisterPerson_andRetainsRawText() {
orchestrator.runImport();
Person walter = personRepository.findBySourceRef("de-gruyter-walter").orElseThrow();
Document doc = documentRepository.findByOriginalFilename("W-0001").orElseThrow();
assertThat(doc.getSender()).isNotNull();
assertThat(doc.getSender().getId()).isEqualTo(walter.getId());
assertThat(doc.getSenderText()).isEqualTo("Walter de Gruyter");
assertThat(doc.getStatus()).isEqualTo(DocumentStatus.PLACEHOLDER);
}
@Test
void import_provisionalFlag_trueForImporterCreated_falseForRegister() {
orchestrator.runImport();
Optional<Person> register = personRepository.findBySourceRef("de-gruyter-walter");
assertThat(register).get().extracting(Person::isProvisional).isEqualTo(false);
}
// ─── synthetic-but-real artifact set ─────────────────────────────────────────────
private void writeArtifacts(Path dir) throws Exception {
writeSheet(dir.resolve("canonical-tag-tree.xlsx"),
List.of("tag_path", "parent_name", "tag_name"),
List.of(
List.of("Themen", "", "Themen"),
List.of("Themen/Brautbriefe", "Themen", "Brautbriefe")));
writeSheet(dir.resolve("canonical-persons.xlsx"),
List.of("person_id", "last_name", "first_name", "maiden_name", "notes", "birth_date", "death_date", "provisional"),
List.of(
List.of("de-gruyter-walter", "de Gruyter", "Walter", "", "", "1865-01-01", "", "False"),
List.of("de-gruyter-eugenie", "de Gruyter", "Eugenie", "Wöhler", "", "", "", "False")));
Files.writeString(dir.resolve("canonical-persons-tree.json"), """
{"persons":[
{"rowId":"row_1","firstName":"Walter","lastName":"de Gruyter","familyMember":true,"personId":"de-gruyter-walter"},
{"rowId":"row_2","firstName":"Eugenie","lastName":"de Gruyter","maidenName":"Wöhler","familyMember":true,"personId":"de-gruyter-eugenie"}
],"relationships":[
{"personId":"row_1","relatedPersonId":"row_2","type":"SPOUSE_OF","source":"verheiratet_mit"}
]}
""");
writeSheet(dir.resolve("canonical-documents.xlsx"),
List.of("index", "file", "sender_person_id", "sender_name", "receiver_person_ids",
"receiver_names", "date_iso", "date_raw", "date_precision", "date_end", "location", "tags", "summary"),
List.of(
List.of("W-0001", "", "de-gruyter-walter", "Walter de Gruyter",
"de-gruyter-eugenie", "Eugenie de Gruyter", "1888-02-15", "15.2.1888", "DAY", "",
"Rotterdam", "Themen/Brautbriefe", "Geschäftsreise"),
List.of("W-0002", "", "de-gruyter-eugenie", "Eugenie de Gruyter",
"de-gruyter-walter", "Walter de Gruyter", "1888-02-16", "16.2.1888", "DAY", "",
"Middelburg", "Themen/Brautbriefe", "Reisepläne")));
}
private void writeSheet(Path file, List<String> headers, List<List<String>> rows) throws Exception {
try (XSSFWorkbook wb = new XSSFWorkbook()) {
Sheet sheet = wb.createSheet("Sheet1");
Row header = sheet.createRow(0);
for (int i = 0; i < headers.size(); i++) {
header.createCell(i).setCellValue(headers.get(i));
}
for (int r = 0; r < rows.size(); r++) {
Row row = sheet.createRow(r + 1);
List<String> values = rows.get(r);
for (int c = 0; c < values.size(); c++) {
row.createCell(c).setCellValue(values.get(c));
}
}
try (OutputStream out = Files.newOutputStream(file)) {
wb.write(out);
}
}
}
}

View File

@@ -41,7 +41,7 @@ class PersonTreeImporterTest {
],"relationships":[]}
""");
new PersonTreeImporter(personService, relationshipService, new com.fasterxml.jackson.databind.ObjectMapper())
new PersonTreeImporter(personService, relationshipService)
.load(json.toFile());
ArgumentCaptor<PersonUpsertCommand> captor = ArgumentCaptor.forClass(PersonUpsertCommand.class);
@@ -72,7 +72,7 @@ class PersonTreeImporterTest {
]}
""");
new PersonTreeImporter(personService, relationshipService, new com.fasterxml.jackson.databind.ObjectMapper())
new PersonTreeImporter(personService, relationshipService)
.load(json.toFile());
ArgumentCaptor<CreateRelationshipRequest> captor = ArgumentCaptor.forClass(CreateRelationshipRequest.class);
@@ -98,8 +98,7 @@ class PersonTreeImporterTest {
]}
""");
PersonTreeImporter importer = new PersonTreeImporter(personService, relationshipService,
new com.fasterxml.jackson.databind.ObjectMapper());
PersonTreeImporter importer = new PersonTreeImporter(personService, relationshipService);
// Must not propagate the conflict — re-import is idempotent.
importer.load(json.toFile());
@@ -120,7 +119,7 @@ class PersonTreeImporterTest {
]}
""");
new PersonTreeImporter(personService, relationshipService, new com.fasterxml.jackson.databind.ObjectMapper())
new PersonTreeImporter(personService, relationshipService)
.load(json.toFile());
verify(relationshipService, org.mockito.Mockito.never()).addRelationship(any(), any());