Compare commits
6 Commits
5cf8fd149e
...
fc53e777d5
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
fc53e777d5 | ||
|
|
4fa2b83c0d | ||
|
|
e9ddaed76a | ||
|
|
5f53c3670f | ||
|
|
7ebf7acd72 | ||
|
|
2f7ea37466 |
@@ -165,6 +165,10 @@ public class DocumentImporter {
|
||||
doc.setContentType(contentType);
|
||||
doc.setSender(sender);
|
||||
doc.setSenderText(blankToNull(senderName));
|
||||
// The canonical row is authoritative for receivers/tags (ADR-025): clear then
|
||||
// re-populate so a shrunk set on re-import prunes stale links rather than
|
||||
// accumulating them. The raw sender_text/receiver_text retention is separate.
|
||||
doc.getReceivers().clear();
|
||||
doc.getReceivers().addAll(receivers);
|
||||
doc.setReceiverText(blankToNull(receiverNames));
|
||||
doc.setDocumentDate(parseIsoDate(row.get("date_iso")));
|
||||
@@ -203,7 +207,10 @@ public class DocumentImporter {
|
||||
.build()));
|
||||
}
|
||||
|
||||
// Authoritative: the canonical row defines the document's tags exactly. Clearing first
|
||||
// means a tag removed from the row is pruned on re-import (ADR-025).
|
||||
private void attachTag(Document doc, String tagPath) {
|
||||
doc.getTags().clear();
|
||||
if (tagPath.isBlank()) return;
|
||||
tagService.findBySourceRef(tagPath).ifPresent(tag -> doc.getTags().add(tag));
|
||||
}
|
||||
|
||||
@@ -88,6 +88,9 @@ public class PersonTreeImporter {
|
||||
private int createRelationships(JsonNode relationships, Map<String, UUID> idByRowId) {
|
||||
int created = 0;
|
||||
for (JsonNode node : relationships) {
|
||||
// Trap: a relationship node's personId / relatedPersonId fields carry the tree's
|
||||
// local rowId (e.g. "row_a"), NOT a person slug. They are resolved through
|
||||
// idByRowId to the upserted person's UUID.
|
||||
UUID person = idByRowId.get(text(node, "personId"));
|
||||
UUID related = idByRowId.get(text(node, "relatedPersonId"));
|
||||
if (person == null || related == null) {
|
||||
|
||||
@@ -163,22 +163,33 @@ public class PersonService {
|
||||
existing.setFirstName(preferHuman(existing.getFirstName(), cmd.firstName()));
|
||||
existing.setLastName(preferHuman(existing.getLastName(), cmd.lastName()));
|
||||
existing.setNotes(preferHuman(existing.getNotes(), cmd.notes()));
|
||||
if (existing.getBirthYear() == null) existing.setBirthYear(cmd.birthYear());
|
||||
if (existing.getDeathYear() == null) existing.setDeathYear(cmd.deathYear());
|
||||
existing.setBirthYear(preferHuman(existing.getBirthYear(), cmd.birthYear()));
|
||||
existing.setDeathYear(preferHuman(existing.getDeathYear(), cmd.deathYear()));
|
||||
if (cmd.personType() != null && existing.getPersonType() == PersonType.PERSON) {
|
||||
existing.setPersonType(cmd.personType());
|
||||
}
|
||||
// provisional is monotonic: once a human confirms a person (false) it never reverts.
|
||||
// provisional is monotonic-downward: once it is false it never reverts to true.
|
||||
// This also pins the cross-loader precedence (ADR-025): a register/tree person is
|
||||
// loaded before documents and already false, so a later document row that references
|
||||
// the same source_ref (provisional=true) can never flip it provisional — the guard
|
||||
// below only fires while existing is still provisional. Order of document rows is
|
||||
// therefore irrelevant.
|
||||
if (existing.isProvisional()) {
|
||||
existing.setProvisional(cmd.provisional());
|
||||
}
|
||||
return existing;
|
||||
}
|
||||
|
||||
// preferHuman keeps an existing human-entered value and only falls back to the canonical
|
||||
// value when the existing one is absent — the single idiom for every fill-blank field.
|
||||
private static String preferHuman(String existing, String canonical) {
|
||||
return (existing == null || existing.isBlank()) ? blankToNull(canonical) : existing;
|
||||
}
|
||||
|
||||
private static Integer preferHuman(Integer existing, Integer canonical) {
|
||||
return existing != null ? existing : canonical;
|
||||
}
|
||||
|
||||
private static String blankToNull(String s) {
|
||||
return (s == null || s.isBlank()) ? null : s.trim();
|
||||
}
|
||||
|
||||
@@ -112,6 +112,48 @@ class CanonicalImportIntegrationTest {
|
||||
assertThat(register).get().extracting(Person::isProvisional).isEqualTo(false);
|
||||
}
|
||||
|
||||
@Test
|
||||
void reimport_prunesRemovedReceiverAndTag_whenCanonicalRowShrinks() throws Exception {
|
||||
orchestrator.runImport();
|
||||
// findById uses the Document.full entity graph so receivers/tags initialise eagerly.
|
||||
Document before = documentRepository.findById(
|
||||
documentRepository.findByOriginalFilename("W-0001").orElseThrow().getId()).orElseThrow();
|
||||
assertThat(before.getReceivers()).isNotEmpty();
|
||||
assertThat(before.getTags()).isNotEmpty();
|
||||
|
||||
// Re-stage the document sheet with W-0001's receiver and tag removed.
|
||||
writeSheet(artifactDir.resolve("canonical-documents.xlsx"),
|
||||
List.of("index", "file", "sender_person_id", "sender_name", "receiver_person_ids",
|
||||
"receiver_names", "date_iso", "date_raw", "date_precision", "date_end", "location", "tags", "summary"),
|
||||
List.of(
|
||||
List.of("W-0001", "", "de-gruyter-walter", "Walter de Gruyter",
|
||||
"", "", "1888-02-15", "15.2.1888", "DAY", "", "Rotterdam", "", "Geschäftsreise"),
|
||||
List.of("W-0002", "", "de-gruyter-eugenie", "Eugenie de Gruyter",
|
||||
"de-gruyter-walter", "Walter de Gruyter", "1888-02-16", "16.2.1888", "DAY", "",
|
||||
"Middelburg", "Themen/Brautbriefe", "Reisepläne")));
|
||||
|
||||
orchestrator.runImport();
|
||||
|
||||
Document after = documentRepository.findById(before.getId()).orElseThrow();
|
||||
assertThat(after.getReceivers()).isEmpty();
|
||||
assertThat(after.getTags()).isEmpty();
|
||||
}
|
||||
|
||||
@Test
|
||||
void import_neverFlipsRegisterPersonToProvisional_whenReferencedByDocumentRow() {
|
||||
// de-gruyter-walter is a register person (provisional=false) AND the sender of W-0001.
|
||||
// The orchestrator loads the register before documents, so the document loader's
|
||||
// register-first match links the existing person and never mints a provisional one.
|
||||
// A second run (documents reference the same person again) must not flip it true.
|
||||
orchestrator.runImport();
|
||||
orchestrator.runImport();
|
||||
|
||||
Person walter = personRepository.findBySourceRef("de-gruyter-walter").orElseThrow();
|
||||
assertThat(walter.isProvisional()).isFalse();
|
||||
Person eugenie = personRepository.findBySourceRef("de-gruyter-eugenie").orElseThrow();
|
||||
assertThat(eugenie.isProvisional()).isFalse();
|
||||
}
|
||||
|
||||
// ─── synthetic-but-real artifact set ─────────────────────────────────────────────
|
||||
|
||||
private void writeArtifacts(Path dir) throws Exception {
|
||||
|
||||
@@ -55,6 +55,21 @@ class CanonicalSheetReaderTest {
|
||||
assertThat(rows.get(0).get("does_not_exist")).isEmpty();
|
||||
}
|
||||
|
||||
@Test
|
||||
void get_returnsEmptyString_forTrailingColumns_whenRowShorterThanHeader(@TempDir Path tempDir) throws Exception {
|
||||
// POI omits trailing empty cells, so a real-world artifact row can be narrower than
|
||||
// the header. The missing columns must read as "" rather than throwing.
|
||||
Path xlsx = write(tempDir,
|
||||
List.of("index", "file", "summary"),
|
||||
List.of(List.of("W-0001")));
|
||||
|
||||
List<CanonicalSheetReader.Row> rows = CanonicalSheetReader.readRows(xlsx.toFile(), List.of("index", "file", "summary"));
|
||||
|
||||
assertThat(rows.get(0).get("index")).isEqualTo("W-0001");
|
||||
assertThat(rows.get(0).get("file")).isEmpty();
|
||||
assertThat(rows.get(0).get("summary")).isEmpty();
|
||||
}
|
||||
|
||||
@Test
|
||||
void splitList_splitsOnPipe() {
|
||||
assertThat(CanonicalSheetReader.splitList("a|b|c")).containsExactly("a", "b", "c");
|
||||
|
||||
@@ -382,6 +382,28 @@ class DocumentImporterTest {
|
||||
verify(documentService).save(org.mockito.ArgumentMatchers.argThat(d -> d.getId().equals(existing.getId())));
|
||||
}
|
||||
|
||||
// ─── canonical collections are authoritative — re-import prunes removed links ──────
|
||||
|
||||
@Test
|
||||
void load_prunesReceiversAndTags_whenCanonicalRowShrinks(@TempDir Path tempDir) throws Exception {
|
||||
ReflectionTestUtils.setField(importer, "importDir", tempDir.toString());
|
||||
Person staleReceiver = Person.builder().id(UUID.randomUUID()).sourceRef("stale-receiver").lastName("Stale").build();
|
||||
Tag staleTag = Tag.builder().id(UUID.randomUUID()).name("Stale").sourceRef("Themen/Stale").build();
|
||||
Document existing = Document.builder().id(UUID.randomUUID())
|
||||
.originalFilename("W-0008").status(DocumentStatus.PLACEHOLDER).build();
|
||||
existing.getReceivers().add(staleReceiver);
|
||||
existing.getTags().add(staleTag);
|
||||
when(documentService.findByOriginalFilename("W-0008")).thenReturn(Optional.of(existing));
|
||||
when(documentService.save(any())).thenAnswer(inv -> inv.getArgument(0));
|
||||
// The canonical row now carries no receiver and no tag: both stale links must go.
|
||||
Path xlsx = writeDocs(tempDir, docRow("W-0008", "", "", "", "", "", "", "", "", ""));
|
||||
|
||||
importer.load(xlsx.toFile());
|
||||
|
||||
verify(documentService).save(org.mockito.ArgumentMatchers.argThat(d ->
|
||||
d.getReceivers().isEmpty() && d.getTags().isEmpty()));
|
||||
}
|
||||
|
||||
// ─── helpers ─────────────────────────────────────────────────────────────────────
|
||||
|
||||
private Map<String, String> docRow(String index, String file, String senderId, String senderName,
|
||||
|
||||
@@ -19,6 +19,7 @@ import java.nio.file.Path;
|
||||
import java.util.UUID;
|
||||
|
||||
import static org.assertj.core.api.Assertions.assertThat;
|
||||
import static org.assertj.core.api.Assertions.assertThatThrownBy;
|
||||
import static org.mockito.ArgumentMatchers.any;
|
||||
import static org.mockito.ArgumentMatchers.eq;
|
||||
import static org.mockito.Mockito.doThrow;
|
||||
@@ -106,6 +107,31 @@ class PersonTreeImporterTest {
|
||||
verify(relationshipService).addRelationship(any(), any());
|
||||
}
|
||||
|
||||
@Test
|
||||
void load_propagatesUnexpectedDomainException_fromAddRelationship(@TempDir Path tempDir) throws Exception {
|
||||
PersonService personService = mock(PersonService.class);
|
||||
RelationshipService relationshipService = mock(RelationshipService.class);
|
||||
when(personService.upsertBySourceRef(any()))
|
||||
.thenAnswer(inv -> personOf(inv.getArgument(0)));
|
||||
// An unexpected ErrorCode (not DUPLICATE/CIRCULAR) must NOT be swallowed.
|
||||
doThrow(DomainException.internal(ErrorCode.INTERNAL_ERROR, "boom"))
|
||||
.when(relationshipService).addRelationship(any(), any());
|
||||
Path json = write(tempDir, """
|
||||
{"persons":[
|
||||
{"rowId":"row_a","lastName":"A","familyMember":true,"personId":"a"},
|
||||
{"rowId":"row_b","lastName":"B","familyMember":true,"personId":"b"}
|
||||
],"relationships":[
|
||||
{"personId":"row_a","relatedPersonId":"row_b","type":"SPOUSE_OF","source":"verheiratet_mit"}
|
||||
]}
|
||||
""");
|
||||
|
||||
PersonTreeImporter importer = new PersonTreeImporter(personService, relationshipService);
|
||||
|
||||
assertThatThrownBy(() -> importer.load(json.toFile()))
|
||||
.isInstanceOf(DomainException.class)
|
||||
.extracting("code").isEqualTo(ErrorCode.INTERNAL_ERROR);
|
||||
}
|
||||
|
||||
@Test
|
||||
void load_skipsRelationship_whenRowIdUnresolved(@TempDir Path tempDir) throws Exception {
|
||||
PersonService personService = mock(PersonService.class);
|
||||
|
||||
@@ -97,6 +97,26 @@ class PersonImportUpsertTest {
|
||||
assertThat(result.getNotes()).isEqualTo("Nichte von Herbert");
|
||||
}
|
||||
|
||||
@Test
|
||||
void upsertBySourceRef_fillsBlankYears_butPreservesHumanEditedYears_onReimport() {
|
||||
// Existing has a human-set birthYear and a blank deathYear.
|
||||
Person existing = Person.builder()
|
||||
.id(UUID.randomUUID()).sourceRef("clara-cram")
|
||||
.lastName("Cram").birthYear(1890).deathYear(null).build();
|
||||
when(personRepository.findBySourceRef("clara-cram")).thenReturn(Optional.of(existing));
|
||||
when(personRepository.save(any())).thenAnswer(inv -> inv.getArgument(0));
|
||||
|
||||
PersonUpsertCommand cmd = PersonUpsertCommand.builder()
|
||||
.sourceRef("clara-cram").lastName("Cram")
|
||||
.birthYear(1888).deathYear(1965)
|
||||
.personType(PersonType.PERSON).provisional(false).build();
|
||||
|
||||
Person result = personService.upsertBySourceRef(cmd);
|
||||
|
||||
assertThat(result.getBirthYear()).isEqualTo(1890); // human value kept
|
||||
assertThat(result.getDeathYear()).isEqualTo(1965); // blank filled from canonical
|
||||
}
|
||||
|
||||
@Test
|
||||
void upsertBySourceRef_neverFlipsProvisionalBackToTrue_onceHumanConfirmed() {
|
||||
// A human confirmed this provisional importer-created person (provisional -> false).
|
||||
|
||||
@@ -572,7 +572,8 @@ maps the clean columns by header name. See [ADR-025](adr/025-canonical-import-an
|
||||
|
||||
```bash
|
||||
cd tools/import-normalizer
|
||||
python -m normalizer # or the documented normalizer entrypoint
|
||||
python3 -m venv .venv && .venv/bin/pip install -r requirements.txt # once, on a fresh clone
|
||||
.venv/bin/python normalize.py
|
||||
# writes the four canonical artifacts into ./out/
|
||||
```
|
||||
|
||||
|
||||
@@ -72,11 +72,26 @@ loader uses `RelationshipService`, never the relationship repository.
|
||||
|
||||
Settled sub-decisions:
|
||||
|
||||
- **Idempotency precedence = preserve human edits.** Persons/tags upsert by `source_ref`,
|
||||
documents by `index`. On re-import a non-blank field a human changed in-app is never
|
||||
overwritten (blank fields are filled from canonical), and `provisional` is monotonic — once
|
||||
a human confirms a person (`false`) it never reverts to `true`. Verified against real
|
||||
Postgres in `CanonicalImportIntegrationTest`.
|
||||
- **Idempotency precedence is domain-specific.** Persons/tags upsert by `source_ref`,
|
||||
documents by `index`. Two distinct rules apply:
|
||||
- **Person/Tag scalar fields = preserve human edits.** On re-import a non-blank field a human
|
||||
changed in-app is never overwritten (blank fields are filled from canonical via the single
|
||||
`preferHuman` idiom), and `provisional` is monotonic-downward — once a human confirms a
|
||||
person (`false`) it never reverts to `true`. Because the orchestrator loads the register and
|
||||
tree *before* documents, a person already `false` can never be flipped provisional by a
|
||||
later document row that references the same `source_ref`, regardless of document-row order.
|
||||
- **Document sender/receivers/tags = canonical-authoritative.** A document's sender, receiver
|
||||
set, and tag set are owned by the canonical row, not the archivist. On re-import of a
|
||||
PLACEHOLDER document `DocumentImporter` clears and re-populates `receivers`/`tags` so a row
|
||||
whose set *shrinks* prunes the removed links rather than accumulating stale ones. The
|
||||
"preserve human edits" rule above does **not** extend to these collections. The raw
|
||||
`sender_text`/`receiver_text` cells are always retained verbatim (a separate invariant).
|
||||
Note non-PLACEHOLDER documents are skipped entirely (`ALREADY_EXISTS`), so once a document
|
||||
has a file the importer never touches it again — this bounds the authoritative-overwrite
|
||||
blast radius to placeholder rows.
|
||||
Verified against real Postgres in `CanonicalImportIntegrationTest`
|
||||
(`reimport_preservesHumanEditedPersonField`, `reimport_prunesRemovedReceiverAndTag…`,
|
||||
`import_neverFlipsRegisterPersonToProvisional…`).
|
||||
- **Name policy = Option A.** The normalizer resolved attribution upstream: the document sheet
|
||||
carries the resolved slug in `sender_person_id` / `receiver_person_ids` and the raw cell in
|
||||
`sender_name` / `receiver_names`. The importer routes register-first by `source_ref`
|
||||
@@ -114,6 +129,15 @@ Settled sub-decisions:
|
||||
- **Forward-only.** The migration is immutable once shipped (Flyway checksum model); any fix
|
||||
goes in a later version. There is no down-migration — rollback means restoring from the
|
||||
nightly `pg_dump`, the standard procedure.
|
||||
- **`runImport()` is non-transactional — per-loader transactions only.** The orchestrator
|
||||
does not wrap the four loaders in a single transaction; each loader (or the per-call
|
||||
`upsertBySourceRef` / `DocumentImporter.load`) carries its own `@Transactional` boundary. A
|
||||
partial failure mid-run (e.g. the document loader throws after tags + persons committed)
|
||||
leaves the earlier loaders' data committed and the `ImportStatus` set to `FAILED`. This is
|
||||
acceptable precisely because the import is idempotent: re-running is safe and converges to
|
||||
the same state, so the operational recovery for a partial failure is simply to fix the
|
||||
offending artifact and re-trigger the import — no manual cleanup of half-written data is
|
||||
required. A future maintainer must not assume all-or-nothing semantics.
|
||||
- **`PersonSummaryDTO` coupling.** `provisional` was added to the `PersonSummaryDTO` native
|
||||
interface projection; because the projection is backed by native SQL, the column had to be
|
||||
added to all three native `SELECT`s (`findAllWithDocumentCount`, `searchWithDocumentCount`,
|
||||
|
||||
Reference in New Issue
Block a user