test(importing): verify re-import pruning and provisional precedence on real Postgres

Add a Testcontainers test that re-imports a document with a receiver and a tag
removed from the canonical row and asserts both links are pruned. Add a test that a
register person referenced by a document row is never flipped to provisional,
regardless of re-import, since the orchestrator loads the register/tree before
documents and the monotonic-downward guard prevents a flip. Pin that cross-loader
precedence in a mergeCanonical comment.

Refs #669

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
Marcel
2026-05-27 11:02:37 +02:00
parent 7ebf7acd72
commit 5f53c3670f
2 changed files with 48 additions and 1 deletions

View File

@@ -168,7 +168,12 @@ public class PersonService {
if (cmd.personType() != null && existing.getPersonType() == PersonType.PERSON) {
existing.setPersonType(cmd.personType());
}
// provisional is monotonic: once a human confirms a person (false) it never reverts.
// provisional is monotonic-downward: once it is false it never reverts to true.
// This also pins the cross-loader precedence (ADR-025): a register/tree person is
// loaded before documents and already false, so a later document row that references
// the same source_ref (provisional=true) can never flip it provisional — the guard
// below only fires while existing is still provisional. Order of document rows is
// therefore irrelevant.
if (existing.isProvisional()) {
existing.setProvisional(cmd.provisional());
}

View File

@@ -112,6 +112,48 @@ class CanonicalImportIntegrationTest {
assertThat(register).get().extracting(Person::isProvisional).isEqualTo(false);
}
@Test
void reimport_prunesRemovedReceiverAndTag_whenCanonicalRowShrinks() throws Exception {
orchestrator.runImport();
// findById uses the Document.full entity graph so receivers/tags initialise eagerly.
Document before = documentRepository.findById(
documentRepository.findByOriginalFilename("W-0001").orElseThrow().getId()).orElseThrow();
assertThat(before.getReceivers()).isNotEmpty();
assertThat(before.getTags()).isNotEmpty();
// Re-stage the document sheet with W-0001's receiver and tag removed.
writeSheet(artifactDir.resolve("canonical-documents.xlsx"),
List.of("index", "file", "sender_person_id", "sender_name", "receiver_person_ids",
"receiver_names", "date_iso", "date_raw", "date_precision", "date_end", "location", "tags", "summary"),
List.of(
List.of("W-0001", "", "de-gruyter-walter", "Walter de Gruyter",
"", "", "1888-02-15", "15.2.1888", "DAY", "", "Rotterdam", "", "Geschäftsreise"),
List.of("W-0002", "", "de-gruyter-eugenie", "Eugenie de Gruyter",
"de-gruyter-walter", "Walter de Gruyter", "1888-02-16", "16.2.1888", "DAY", "",
"Middelburg", "Themen/Brautbriefe", "Reisepläne")));
orchestrator.runImport();
Document after = documentRepository.findById(before.getId()).orElseThrow();
assertThat(after.getReceivers()).isEmpty();
assertThat(after.getTags()).isEmpty();
}
@Test
void import_neverFlipsRegisterPersonToProvisional_whenReferencedByDocumentRow() {
// de-gruyter-walter is a register person (provisional=false) AND the sender of W-0001.
// The orchestrator loads the register before documents, so the document loader's
// register-first match links the existing person and never mints a provisional one.
// A second run (documents reference the same person again) must not flip it true.
orchestrator.runImport();
orchestrator.runImport();
Person walter = personRepository.findBySourceRef("de-gruyter-walter").orElseThrow();
assertThat(walter.isProvisional()).isFalse();
Person eugenie = personRepository.findBySourceRef("de-gruyter-eugenie").orElseThrow();
assertThat(eugenie.isProvisional()).isFalse();
}
// ─── synthetic-but-real artifact set ─────────────────────────────────────────────
private void writeArtifacts(Path dir) throws Exception {