From 9cc682cf72a808fc90b3f5c3daa03f118ad54b3c Mon Sep 17 00:00:00 2001 From: Marcel Date: Wed, 27 May 2026 10:41:08 +0200 Subject: [PATCH] test(importing): Testcontainers idempotency + human-edit-preserve IT Full-stack integration test on real postgres:16-alpine (the UNIQUE(source_ref) + upsert-on-conflict only exist in real Postgres, never H2). Writes a synthetic-but-real four-artifact set, runs the import twice, and asserts person/tag/document counts are identical on re-import (no duplicates), plus the Resolved-decision-#1 precedence: a person field edited in-app survives a re-import. Also asserts register-first sender linkage with raw-text retention and the provisional contract. Fixes a re-import bug the IT surfaced: load() is now @Transactional so an existing document's lazy receivers collection initialises within the session (the previous self-invoked @Transactional on the per-row method never opened a transaction). PersonTreeImporter owns its ObjectMapper rather than depending on the web bean, which is absent in a NONE web environment. Refs #669 Co-Authored-By: Claude Opus 4.7 --- .../importing/DocumentImporter.java | 7 +- .../importing/PersonTreeImporter.java | 7 +- .../CanonicalImportIntegrationTest.java | 170 ++++++++++++++++++ .../importing/PersonTreeImporterTest.java | 9 +- 4 files changed, 184 insertions(+), 9 deletions(-) create mode 100644 backend/src/test/java/org/raddatz/familienarchiv/importing/CanonicalImportIntegrationTest.java diff --git a/backend/src/main/java/org/raddatz/familienarchiv/importing/DocumentImporter.java b/backend/src/main/java/org/raddatz/familienarchiv/importing/DocumentImporter.java index fb021d0f..52465413 100644 --- a/backend/src/main/java/org/raddatz/familienarchiv/importing/DocumentImporter.java +++ b/backend/src/main/java/org/raddatz/familienarchiv/importing/DocumentImporter.java @@ -76,6 +76,10 @@ public class DocumentImporter { /** Outcome of loading the document sheet: processed count + per-file skips. */ public record LoadResult(int processed, List skippedFiles) {} + // One transaction for the whole sheet keeps the Hibernate session open so an existing + // document's lazy receivers collection initialises during an idempotent re-import. + // Invoked cross-bean from the orchestrator, so the @Transactional proxy applies. + @Transactional public LoadResult load(File artifact) { List rows = CanonicalSheetReader.readRows(artifact, REQUIRED_HEADERS); int processed = 0; @@ -116,8 +120,7 @@ public class DocumentImporter { return persist(row, index, resolved); } - @Transactional - protected Optional persist(CanonicalSheetReader.Row row, String index, Optional file) { + private Optional persist(CanonicalSheetReader.Row row, String index, Optional file) { Document existing = documentService.findByOriginalFilename(index).orElse(null); if (existing != null && existing.getStatus() != DocumentStatus.PLACEHOLDER) { return Optional.of(ImportStatus.SkipReason.ALREADY_EXISTS); diff --git a/backend/src/main/java/org/raddatz/familienarchiv/importing/PersonTreeImporter.java b/backend/src/main/java/org/raddatz/familienarchiv/importing/PersonTreeImporter.java index 61392ca2..4cf1f55f 100644 --- a/backend/src/main/java/org/raddatz/familienarchiv/importing/PersonTreeImporter.java +++ b/backend/src/main/java/org/raddatz/familienarchiv/importing/PersonTreeImporter.java @@ -34,9 +34,12 @@ import java.util.UUID; @Slf4j public class PersonTreeImporter { + // The tree JSON is a local implementation detail, not a shared API payload, so the + // importer owns its own mapper rather than depending on the web ObjectMapper bean. + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + private final PersonService personService; private final RelationshipService relationshipService; - private final ObjectMapper objectMapper; public int load(File artifact) { JsonNode root = readTree(artifact); @@ -49,7 +52,7 @@ public class PersonTreeImporter { private JsonNode readTree(File artifact) { try { - return objectMapper.readTree(artifact); + return OBJECT_MAPPER.readTree(artifact); } catch (Exception e) { throw DomainException.badRequest(ErrorCode.IMPORT_ARTIFACT_INVALID, "Unreadable canonical artifact: " + artifact.getName()); diff --git a/backend/src/test/java/org/raddatz/familienarchiv/importing/CanonicalImportIntegrationTest.java b/backend/src/test/java/org/raddatz/familienarchiv/importing/CanonicalImportIntegrationTest.java new file mode 100644 index 00000000..be687928 --- /dev/null +++ b/backend/src/test/java/org/raddatz/familienarchiv/importing/CanonicalImportIntegrationTest.java @@ -0,0 +1,170 @@ +package org.raddatz.familienarchiv.importing; + +import org.apache.poi.ss.usermodel.Row; +import org.apache.poi.ss.usermodel.Sheet; +import org.apache.poi.xssf.usermodel.XSSFWorkbook; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.raddatz.familienarchiv.PostgresContainerConfig; +import org.raddatz.familienarchiv.document.Document; +import org.raddatz.familienarchiv.document.DocumentRepository; +import org.raddatz.familienarchiv.document.DocumentStatus; +import org.raddatz.familienarchiv.person.Person; +import org.raddatz.familienarchiv.person.PersonRepository; +import org.raddatz.familienarchiv.tag.TagRepository; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.boot.test.context.SpringBootTest; +import org.springframework.context.annotation.Import; +import org.springframework.test.context.ActiveProfiles; +import org.springframework.test.context.bean.override.mockito.MockitoBean; +import org.springframework.test.util.ReflectionTestUtils; +import software.amazon.awssdk.services.s3.S3Client; + +import java.io.OutputStream; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.List; +import java.util.Optional; + +import static org.assertj.core.api.Assertions.assertThat; + +/** + * Real Postgres (Testcontainers) integration test for the canonical importer. The + * {@code UNIQUE(source_ref)} constraint and the upsert-on-conflict behaviour only exist + * in real Postgres (never H2), so idempotency is verified here. S3 is mocked — the + * synthetic document rows carry no on-disk files, so every document is a PLACEHOLDER and + * no upload is attempted. + */ +@SpringBootTest(webEnvironment = SpringBootTest.WebEnvironment.NONE) +@ActiveProfiles("test") +@Import(PostgresContainerConfig.class) +class CanonicalImportIntegrationTest { + + @MockitoBean S3Client s3Client; + + @Autowired CanonicalImportOrchestrator orchestrator; + @Autowired PersonRepository personRepository; + @Autowired TagRepository tagRepository; + @Autowired DocumentRepository documentRepository; + + Path artifactDir; + + @BeforeEach + void setUp() throws Exception { + documentRepository.deleteAll(); + personRepository.deleteAll(); + tagRepository.deleteAll(); + artifactDir = Files.createTempDirectory("canonical-import-it"); + writeArtifacts(artifactDir); + ReflectionTestUtils.setField(orchestrator, "canonicalDir", artifactDir.toString()); + } + + @Test + void reimport_isIdempotent_noDuplicatePersonsTagsOrDocuments() { + orchestrator.runImport(); + long personsAfterFirst = personRepository.count(); + long tagsAfterFirst = tagRepository.count(); + long documentsAfterFirst = documentRepository.count(); + assertThat(orchestrator.getStatus().state()).isEqualTo(ImportStatus.State.DONE); + assertThat(personsAfterFirst).isPositive(); + assertThat(tagsAfterFirst).isPositive(); + assertThat(documentsAfterFirst).isPositive(); + + orchestrator.runImport(); + + assertThat(personRepository.count()).isEqualTo(personsAfterFirst); + assertThat(tagRepository.count()).isEqualTo(tagsAfterFirst); + assertThat(documentRepository.count()).isEqualTo(documentsAfterFirst); + } + + @Test + void reimport_preservesHumanEditedPersonField() { + orchestrator.runImport(); + Person walter = personRepository.findBySourceRef("de-gruyter-walter").orElseThrow(); + walter.setNotes("Verified by archivist"); + walter.setFirstName("Walther"); + personRepository.save(walter); + + orchestrator.runImport(); + + Person reimported = personRepository.findBySourceRef("de-gruyter-walter").orElseThrow(); + assertThat(reimported.getNotes()).isEqualTo("Verified by archivist"); + assertThat(reimported.getFirstName()).isEqualTo("Walther"); + } + + @Test + void import_linksDocumentSenderToRegisterPerson_andRetainsRawText() { + orchestrator.runImport(); + + Person walter = personRepository.findBySourceRef("de-gruyter-walter").orElseThrow(); + Document doc = documentRepository.findByOriginalFilename("W-0001").orElseThrow(); + assertThat(doc.getSender()).isNotNull(); + assertThat(doc.getSender().getId()).isEqualTo(walter.getId()); + assertThat(doc.getSenderText()).isEqualTo("Walter de Gruyter"); + assertThat(doc.getStatus()).isEqualTo(DocumentStatus.PLACEHOLDER); + } + + @Test + void import_provisionalFlag_trueForImporterCreated_falseForRegister() { + orchestrator.runImport(); + + Optional register = personRepository.findBySourceRef("de-gruyter-walter"); + assertThat(register).get().extracting(Person::isProvisional).isEqualTo(false); + } + + // ─── synthetic-but-real artifact set ───────────────────────────────────────────── + + private void writeArtifacts(Path dir) throws Exception { + writeSheet(dir.resolve("canonical-tag-tree.xlsx"), + List.of("tag_path", "parent_name", "tag_name"), + List.of( + List.of("Themen", "", "Themen"), + List.of("Themen/Brautbriefe", "Themen", "Brautbriefe"))); + + writeSheet(dir.resolve("canonical-persons.xlsx"), + List.of("person_id", "last_name", "first_name", "maiden_name", "notes", "birth_date", "death_date", "provisional"), + List.of( + List.of("de-gruyter-walter", "de Gruyter", "Walter", "", "", "1865-01-01", "", "False"), + List.of("de-gruyter-eugenie", "de Gruyter", "Eugenie", "Wöhler", "", "", "", "False"))); + + Files.writeString(dir.resolve("canonical-persons-tree.json"), """ + {"persons":[ + {"rowId":"row_1","firstName":"Walter","lastName":"de Gruyter","familyMember":true,"personId":"de-gruyter-walter"}, + {"rowId":"row_2","firstName":"Eugenie","lastName":"de Gruyter","maidenName":"Wöhler","familyMember":true,"personId":"de-gruyter-eugenie"} + ],"relationships":[ + {"personId":"row_1","relatedPersonId":"row_2","type":"SPOUSE_OF","source":"verheiratet_mit"} + ]} + """); + + writeSheet(dir.resolve("canonical-documents.xlsx"), + List.of("index", "file", "sender_person_id", "sender_name", "receiver_person_ids", + "receiver_names", "date_iso", "date_raw", "date_precision", "date_end", "location", "tags", "summary"), + List.of( + List.of("W-0001", "", "de-gruyter-walter", "Walter de Gruyter", + "de-gruyter-eugenie", "Eugenie de Gruyter", "1888-02-15", "15.2.1888", "DAY", "", + "Rotterdam", "Themen/Brautbriefe", "Geschäftsreise"), + List.of("W-0002", "", "de-gruyter-eugenie", "Eugenie de Gruyter", + "de-gruyter-walter", "Walter de Gruyter", "1888-02-16", "16.2.1888", "DAY", "", + "Middelburg", "Themen/Brautbriefe", "Reisepläne"))); + } + + private void writeSheet(Path file, List headers, List> rows) throws Exception { + try (XSSFWorkbook wb = new XSSFWorkbook()) { + Sheet sheet = wb.createSheet("Sheet1"); + Row header = sheet.createRow(0); + for (int i = 0; i < headers.size(); i++) { + header.createCell(i).setCellValue(headers.get(i)); + } + for (int r = 0; r < rows.size(); r++) { + Row row = sheet.createRow(r + 1); + List values = rows.get(r); + for (int c = 0; c < values.size(); c++) { + row.createCell(c).setCellValue(values.get(c)); + } + } + try (OutputStream out = Files.newOutputStream(file)) { + wb.write(out); + } + } + } +} diff --git a/backend/src/test/java/org/raddatz/familienarchiv/importing/PersonTreeImporterTest.java b/backend/src/test/java/org/raddatz/familienarchiv/importing/PersonTreeImporterTest.java index bef9797b..fbd06ba8 100644 --- a/backend/src/test/java/org/raddatz/familienarchiv/importing/PersonTreeImporterTest.java +++ b/backend/src/test/java/org/raddatz/familienarchiv/importing/PersonTreeImporterTest.java @@ -41,7 +41,7 @@ class PersonTreeImporterTest { ],"relationships":[]} """); - new PersonTreeImporter(personService, relationshipService, new com.fasterxml.jackson.databind.ObjectMapper()) + new PersonTreeImporter(personService, relationshipService) .load(json.toFile()); ArgumentCaptor captor = ArgumentCaptor.forClass(PersonUpsertCommand.class); @@ -72,7 +72,7 @@ class PersonTreeImporterTest { ]} """); - new PersonTreeImporter(personService, relationshipService, new com.fasterxml.jackson.databind.ObjectMapper()) + new PersonTreeImporter(personService, relationshipService) .load(json.toFile()); ArgumentCaptor captor = ArgumentCaptor.forClass(CreateRelationshipRequest.class); @@ -98,8 +98,7 @@ class PersonTreeImporterTest { ]} """); - PersonTreeImporter importer = new PersonTreeImporter(personService, relationshipService, - new com.fasterxml.jackson.databind.ObjectMapper()); + PersonTreeImporter importer = new PersonTreeImporter(personService, relationshipService); // Must not propagate the conflict — re-import is idempotent. importer.load(json.toFile()); @@ -120,7 +119,7 @@ class PersonTreeImporterTest { ]} """); - new PersonTreeImporter(personService, relationshipService, new com.fasterxml.jackson.databind.ObjectMapper()) + new PersonTreeImporter(personService, relationshipService) .load(json.toFile()); verify(relationshipService, org.mockito.Mockito.never()).addRelationship(any(), any());