diff --git a/backend/src/main/java/org/raddatz/familienarchiv/document/DatePrecision.java b/backend/src/main/java/org/raddatz/familienarchiv/document/DatePrecision.java new file mode 100644 index 00000000..e67f17e1 --- /dev/null +++ b/backend/src/main/java/org/raddatz/familienarchiv/document/DatePrecision.java @@ -0,0 +1,17 @@ +package org.raddatz.familienarchiv.document; + +/** + * Precision of a document's date. Verbatim mirror of the import normalizer's + * {@code Precision} enum (tools/import-normalizer/dates.py) — the canonical output is the + * contract, so there is no translation layer. Do not add, remove, or rename values without + * also changing the normalizer; a mismatch silently breaks import idempotency (see ADR-025). + */ +public enum DatePrecision { + DAY, + MONTH, + SEASON, + YEAR, + RANGE, + APPROX, + UNKNOWN +} diff --git a/backend/src/main/java/org/raddatz/familienarchiv/document/Document.java b/backend/src/main/java/org/raddatz/familienarchiv/document/Document.java index 71c6dead..7f702763 100644 --- a/backend/src/main/java/org/raddatz/familienarchiv/document/Document.java +++ b/backend/src/main/java/org/raddatz/familienarchiv/document/Document.java @@ -91,6 +91,29 @@ public class Document { @Column(name = "meta_date") private LocalDate documentDate; // Wann wurde der Brief geschrieben? + // Precision of documentDate — drives honest rendering ("ca. 1943", "Frühjahr 1943"). + // Verbatim mirror of the normalizer's Precision enum (see ADR-025). + @Enumerated(EnumType.STRING) + @Column(name = "meta_date_precision", nullable = false, length = 16) + @Schema(requiredMode = Schema.RequiredMode.REQUIRED) + @Builder.Default + private DatePrecision metaDatePrecision = DatePrecision.UNKNOWN; + + // Range end — only set when metaDatePrecision is RANGE (open-ended ranges allowed → may be null). + @Column(name = "meta_date_end") + private LocalDate metaDateEnd; + + // Original date cell, verbatim, preserved for provenance and "as written" display. + @Column(name = "meta_date_raw", columnDefinition = "TEXT") + private String metaDateRaw; + + // Raw attribution preserved even when a person is linked via sender/receivers. + @Column(name = "sender_text", columnDefinition = "TEXT") + private String senderText; + + @Column(name = "receiver_text", columnDefinition = "TEXT") + private String receiverText; + @Column(name = "meta_location") private String location; diff --git a/backend/src/main/java/org/raddatz/familienarchiv/document/DocumentBatchMetadataDTO.java b/backend/src/main/java/org/raddatz/familienarchiv/document/DocumentBatchMetadataDTO.java index e9e47270..56553692 100644 --- a/backend/src/main/java/org/raddatz/familienarchiv/document/DocumentBatchMetadataDTO.java +++ b/backend/src/main/java/org/raddatz/familienarchiv/document/DocumentBatchMetadataDTO.java @@ -12,6 +12,8 @@ public class DocumentBatchMetadataDTO { private UUID senderId; private List receiverIds; private LocalDate documentDate; + private DatePrecision metaDatePrecision; + private LocalDate metaDateEnd; private String location; private List tagNames; private Boolean metadataComplete; diff --git a/backend/src/main/java/org/raddatz/familienarchiv/document/DocumentListItem.java b/backend/src/main/java/org/raddatz/familienarchiv/document/DocumentListItem.java index 7cbc6496..be6b3d40 100644 --- a/backend/src/main/java/org/raddatz/familienarchiv/document/DocumentListItem.java +++ b/backend/src/main/java/org/raddatz/familienarchiv/document/DocumentListItem.java @@ -18,6 +18,9 @@ public record DocumentListItem( String originalFilename, String thumbnailUrl, LocalDate documentDate, + @Schema(requiredMode = Schema.RequiredMode.REQUIRED) + DatePrecision metaDatePrecision, + LocalDate metaDateEnd, Person sender, @Schema(requiredMode = Schema.RequiredMode.REQUIRED) List receivers, diff --git a/backend/src/main/java/org/raddatz/familienarchiv/document/DocumentService.java b/backend/src/main/java/org/raddatz/familienarchiv/document/DocumentService.java index cfbbf848..edeedee6 100644 --- a/backend/src/main/java/org/raddatz/familienarchiv/document/DocumentService.java +++ b/backend/src/main/java/org/raddatz/familienarchiv/document/DocumentService.java @@ -758,6 +758,8 @@ public class DocumentService { doc.getOriginalFilename(), doc.getThumbnailUrl(), doc.getDocumentDate(), + doc.getMetaDatePrecision(), + doc.getMetaDateEnd(), doc.getSender(), List.copyOf(doc.getReceivers()), List.copyOf(doc.getTags()), diff --git a/backend/src/main/java/org/raddatz/familienarchiv/document/DocumentUpdateDTO.java b/backend/src/main/java/org/raddatz/familienarchiv/document/DocumentUpdateDTO.java index 3bfda02c..118113e3 100644 --- a/backend/src/main/java/org/raddatz/familienarchiv/document/DocumentUpdateDTO.java +++ b/backend/src/main/java/org/raddatz/familienarchiv/document/DocumentUpdateDTO.java @@ -11,6 +11,11 @@ import org.raddatz.familienarchiv.ocr.ScriptType; public class DocumentUpdateDTO { private String title; private LocalDate documentDate; + private DatePrecision metaDatePrecision; + private LocalDate metaDateEnd; + private String metaDateRaw; + private String senderText; + private String receiverText; private String location; private String documentLocation; private String archiveBox; diff --git a/backend/src/main/java/org/raddatz/familienarchiv/person/Person.java b/backend/src/main/java/org/raddatz/familienarchiv/person/Person.java index d2332519..993480c4 100644 --- a/backend/src/main/java/org/raddatz/familienarchiv/person/Person.java +++ b/backend/src/main/java/org/raddatz/familienarchiv/person/Person.java @@ -57,6 +57,18 @@ public class Person { @Schema(requiredMode = Schema.RequiredMode.REQUIRED) private boolean familyMember = false; + // The normalizer person_id — join key and re-import idempotency key. Null for manually + // created persons; unique among non-null values (see ADR-025). + @Column(name = "source_ref") + private String sourceRef; + + // A provisional person is one the importer inferred but could not confidently identify. + // Distinct from familyMember (a genealogical fact); set true only by the importer (Phase 3). + @Column(name = "provisional", nullable = false) + @Builder.Default + @Schema(requiredMode = Schema.RequiredMode.REQUIRED) + private boolean provisional = false; + // Entity-graph navigation for JPA JOIN queries (e.g. DocumentSpecifications.hasText). // Uses entity relationship rather than cross-domain repository access, avoiding a // separate DB roundtrip while respecting domain boundaries. diff --git a/backend/src/main/java/org/raddatz/familienarchiv/person/PersonRepository.java b/backend/src/main/java/org/raddatz/familienarchiv/person/PersonRepository.java index 6f431b74..1beebcc3 100644 --- a/backend/src/main/java/org/raddatz/familienarchiv/person/PersonRepository.java +++ b/backend/src/main/java/org/raddatz/familienarchiv/person/PersonRepository.java @@ -41,7 +41,7 @@ public interface PersonRepository extends JpaRepository { SELECT p.id, p.title, p.first_name AS firstName, p.last_name AS lastName, p.person_type AS personType, p.alias, p.birth_year AS birthYear, p.death_year AS deathYear, p.notes, - p.family_member AS familyMember, + p.family_member AS familyMember, p.provisional AS provisional, (SELECT COUNT(*) FROM documents d WHERE d.sender_id = p.id) + (SELECT COUNT(*) FROM document_receivers dr WHERE dr.person_id = p.id) AS documentCount FROM persons p @@ -54,7 +54,7 @@ public interface PersonRepository extends JpaRepository { SELECT p.id, p.title, p.first_name AS firstName, p.last_name AS lastName, p.person_type AS personType, p.alias, p.birth_year AS birthYear, p.death_year AS deathYear, p.notes, - p.family_member AS familyMember, + p.family_member AS familyMember, p.provisional AS provisional, (SELECT COUNT(*) FROM documents d WHERE d.sender_id = p.id) + (SELECT COUNT(*) FROM document_receivers dr WHERE dr.person_id = p.id) AS documentCount FROM persons p @@ -63,7 +63,7 @@ public interface PersonRepository extends JpaRepository { OR LOWER(CONCAT(p.last_name,' ',COALESCE(p.first_name,''))) LIKE LOWER(CONCAT('%',:query,'%')) OR LOWER(p.alias) LIKE LOWER(CONCAT('%',:query,'%')) OR LOWER(a.last_name) LIKE LOWER(CONCAT('%',:query,'%')) - GROUP BY p.id, p.title, p.first_name, p.last_name, p.person_type, p.alias, p.birth_year, p.death_year, p.notes, p.family_member + GROUP BY p.id, p.title, p.first_name, p.last_name, p.person_type, p.alias, p.birth_year, p.death_year, p.notes, p.family_member, p.provisional ORDER BY p.last_name ASC, p.first_name ASC """, nativeQuery = true) @@ -75,7 +75,7 @@ public interface PersonRepository extends JpaRepository { SELECT p.id, p.title, p.first_name AS firstName, p.last_name AS lastName, p.person_type AS personType, p.alias, p.birth_year AS birthYear, p.death_year AS deathYear, p.notes, - p.family_member AS familyMember, + p.family_member AS familyMember, p.provisional AS provisional, (SELECT COUNT(*) FROM documents d WHERE d.sender_id = p.id) + (SELECT COUNT(*) FROM document_receivers dr WHERE dr.person_id = p.id) AS documentCount FROM persons p diff --git a/backend/src/main/java/org/raddatz/familienarchiv/person/PersonSummaryDTO.java b/backend/src/main/java/org/raddatz/familienarchiv/person/PersonSummaryDTO.java index 68cbbe1b..9a92d257 100644 --- a/backend/src/main/java/org/raddatz/familienarchiv/person/PersonSummaryDTO.java +++ b/backend/src/main/java/org/raddatz/familienarchiv/person/PersonSummaryDTO.java @@ -18,6 +18,7 @@ public interface PersonSummaryDTO { Integer getDeathYear(); String getNotes(); boolean isFamilyMember(); + boolean isProvisional(); long getDocumentCount(); default String getDisplayName() { diff --git a/backend/src/main/java/org/raddatz/familienarchiv/tag/Tag.java b/backend/src/main/java/org/raddatz/familienarchiv/tag/Tag.java index fc5974a6..32585eed 100644 --- a/backend/src/main/java/org/raddatz/familienarchiv/tag/Tag.java +++ b/backend/src/main/java/org/raddatz/familienarchiv/tag/Tag.java @@ -30,4 +30,11 @@ public class Tag { /** Color token name (e.g. "sage"), only set on root-level tags. Null means no color. */ private String color; + + /** + * Import identity key, keyed on the canonical tag_path. Null for manually created tags; + * unique among non-null values. The importer (Phase 3) uses it for idempotent re-import. + */ + @Column(name = "source_ref") + private String sourceRef; } diff --git a/backend/src/main/resources/db/migration/V69__import_precision_attribution_identity_schema.sql b/backend/src/main/resources/db/migration/V69__import_precision_attribution_identity_schema.sql new file mode 100644 index 00000000..bec01873 --- /dev/null +++ b/backend/src/main/resources/db/migration/V69__import_precision_attribution_identity_schema.sql @@ -0,0 +1,67 @@ +-- Phase 2 of "Handling the Unknowns": the schema foundation. +-- Consolidates every new import/precision/attribution/identity column into ONE +-- migration with a single owner so downstream phases (importer, rendering, persons +-- directory) compile against a finished, collision-free schema. See ADR-025. +-- +-- This file is forward-only and immutable once shipped (Flyway checksum model): +-- any fix goes in a later version, never an edit here. + +-- ─── documents: date precision, range end, raw date, raw attribution ────────── + +-- Range end is only set for RANGE precision (open-ended ranges allowed → end may be null). +ALTER TABLE documents ADD COLUMN meta_date_end date; + +-- Original date cell, verbatim, for provenance and "as written" display (Phase 4). +ALTER TABLE documents ADD COLUMN meta_date_raw text; + +-- Raw attribution preserved even when a person is linked. +ALTER TABLE documents ADD COLUMN sender_text text; +ALTER TABLE documents ADD COLUMN receiver_text text; + +-- Bound user-influenced spreadsheet text at the DB layer (mirrors transcription_blocks +-- length cap in V18). Defense in depth against malformed/huge import cells. +ALTER TABLE documents ADD CONSTRAINT chk_meta_date_raw_length CHECK (length(meta_date_raw) <= 10000); +ALTER TABLE documents ADD CONSTRAINT chk_sender_text_length CHECK (length(sender_text) <= 10000); +ALTER TABLE documents ADD CONSTRAINT chk_receiver_text_length CHECK (length(receiver_text) <= 10000); + +-- Precision enum — added with a DB default of 'UNKNOWN', backfilled, then made NOT NULL. +-- The DEFAULT serves two purposes: (1) existing rows get 'UNKNOWN' immediately, and +-- (2) raw-SQL inserts that omit the column (test fixtures, ad-hoc data loads) get a sane, +-- CHECK-valid value instead of violating the NOT NULL constraint. JPA saves still set it +-- explicitly via the entity's @Builder.Default = DatePrecision.UNKNOWN. +ALTER TABLE documents ADD COLUMN meta_date_precision varchar(16) DEFAULT 'UNKNOWN'; + +UPDATE documents +SET meta_date_precision = CASE WHEN meta_date IS NOT NULL THEN 'DAY' ELSE 'UNKNOWN' END; + +ALTER TABLE documents ALTER COLUMN meta_date_precision SET NOT NULL; + +-- Fail-closed allowlist of the seven precision values (verbatim mirror of the +-- normalizer's Precision enum). The DB enforces validity independent of the Java enum. +ALTER TABLE documents ADD CONSTRAINT chk_meta_date_precision + CHECK (meta_date_precision IN ('DAY', 'MONTH', 'SEASON', 'YEAR', 'RANGE', 'APPROX', 'UNKNOWN')); + +-- A non-null range end is permitted only when precision = RANGE. A RANGE row MAY have a +-- null end (open-ended range), so the rule is one-directional, not biconditional. +ALTER TABLE documents ADD CONSTRAINT chk_meta_date_end_only_for_range + CHECK (meta_date_end IS NULL OR meta_date_precision = 'RANGE'); + +-- For ranges with both endpoints, the end must not precede the start. +ALTER TABLE documents ADD CONSTRAINT chk_meta_date_end_after_start + CHECK (meta_date_end IS NULL OR meta_date IS NULL OR meta_date_end >= meta_date); + +-- ─── persons: source_ref (import identity) + provisional flag ───────────────── + +-- The normalizer person_id: join key for documents → persons and idempotency key for +-- re-import. Nullable (manually created persons never have one); unique among non-nulls. +ALTER TABLE persons ADD COLUMN source_ref varchar(255); +CREATE UNIQUE INDEX idx_persons_source_ref ON persons (source_ref); + +-- A provisional person is one the importer inferred but could not confidently identify. +-- Stays false until Phase 3 (importer) sets it; no code path writes true in this phase. +ALTER TABLE persons ADD COLUMN provisional boolean NOT NULL DEFAULT false; + +-- ─── tag: source_ref (import identity, keyed on canonical tag_path) ─────────── + +ALTER TABLE tag ADD COLUMN source_ref varchar(255); +CREATE UNIQUE INDEX idx_tag_source_ref ON tag (source_ref); diff --git a/backend/src/test/java/org/raddatz/familienarchiv/MigrationIntegrationTest.java b/backend/src/test/java/org/raddatz/familienarchiv/MigrationIntegrationTest.java index 425d0f59..ce217e6f 100644 --- a/backend/src/test/java/org/raddatz/familienarchiv/MigrationIntegrationTest.java +++ b/backend/src/test/java/org/raddatz/familienarchiv/MigrationIntegrationTest.java @@ -479,6 +479,191 @@ class MigrationIntegrationTest { assertThat(count).isEqualTo(1); } + // ─── V69: import/precision/attribution/identity schema foundation ──────── + + @Test + void v69_metaDatePrecisionColumn_isNotNull() { + Integer count = jdbc.queryForObject( + """ + SELECT COUNT(*) FROM information_schema.columns + WHERE table_schema = 'public' + AND table_name = 'documents' + AND column_name = 'meta_date_precision' + AND is_nullable = 'NO' + """, + Integer.class); + assertThat(count).isEqualTo(1); + } + + @Test + void v69_backfillSql_setsDatedRowsToDayPrecision() { + // Re-run the migration's backfill UPDATE on a freshly dated row to prove the rule. + UUID docId = createDocumentWithDate("1943-05-12"); + + jdbc.update(V69_BACKFILL_PRECISION_SQL); + + String precision = jdbc.queryForObject( + "SELECT meta_date_precision FROM documents WHERE id = ?", String.class, docId); + assertThat(precision).isEqualTo("DAY"); + } + + @Test + void v69_backfillSql_setsUndatedRowsToUnknownPrecision() { + UUID docId = createDocument(); // no meta_date + + jdbc.update(V69_BACKFILL_PRECISION_SQL); + + String precision = jdbc.queryForObject( + "SELECT meta_date_precision FROM documents WHERE id = ?", String.class, docId); + assertThat(precision).isEqualTo("UNKNOWN"); + } + + // Mirrors the backfill UPDATE shipped in V69; idempotent for verification. + private static final String V69_BACKFILL_PRECISION_SQL = """ + UPDATE documents + SET meta_date_precision = CASE WHEN meta_date IS NOT NULL THEN 'DAY' ELSE 'UNKNOWN' END + """; + + @Test + void v69_precisionCheck_rejectsValueOutsideEnum() { + UUID docId = createDocument(); + + assertThatThrownBy(() -> + jdbc.update("UPDATE documents SET meta_date_precision = 'BOGUS' WHERE id = ?", docId) + ).isInstanceOf(DataIntegrityViolationException.class); + } + + @Test + void v69_metaDateEndCheck_rejectsNonNullEndWhenPrecisionNotRange() { + UUID docId = createDocumentWithDate("1943-05-12"); // precision DAY + + assertThatThrownBy(() -> + jdbc.update("UPDATE documents SET meta_date_end = '1943-06-01' WHERE id = ?", docId) + ).isInstanceOf(DataIntegrityViolationException.class); + } + + @Test + void v69_metaDateEndCheck_allowsNonNullEndWhenPrecisionRange() { + UUID docId = createDocumentWithDate("1943-05-12"); + + int rows = jdbc.update( + "UPDATE documents SET meta_date_precision = 'RANGE', meta_date_end = '1943-06-01' WHERE id = ?", + docId); + assertThat(rows).isEqualTo(1); + } + + @Test + void v69_metaDateEndCheck_allowsRangeWithNullEnd() { + // Loose semantics: the normalizer may emit an open-ended RANGE (start only). + UUID docId = createDocumentWithDate("1943-05-12"); + + int rows = jdbc.update( + "UPDATE documents SET meta_date_precision = 'RANGE' WHERE id = ?", docId); + assertThat(rows).isEqualTo(1); + } + + @Test + void v69_metaDateEndCheck_allowsRangeWithBothEndpointsNull() { + // Fully-open RANGE: neither start (meta_date) nor end (meta_date_end) is set. + // Both CHECKs hold (end IS NULL passes chk_meta_date_end_only_for_range; both-null + // passes chk_meta_date_end_after_start), so the row survives. This locks the actual + // DB behavior so a future tightening to a biconditional rule is a deliberate change. + UUID docId = createDocument(); // null meta_date + + int rows = jdbc.update( + "UPDATE documents SET meta_date_precision = 'RANGE' WHERE id = ?", docId); + assertThat(rows).isEqualTo(1); + + Object metaDate = jdbc.queryForObject("SELECT meta_date FROM documents WHERE id = ?", Object.class, docId); + Object metaDateEnd = jdbc.queryForObject( + "SELECT meta_date_end FROM documents WHERE id = ?", Object.class, docId); + assertThat(metaDate).isNull(); + assertThat(metaDateEnd).isNull(); + } + + @Test + void v69_rangeOrderCheck_rejectsEndBeforeStart() { + UUID docId = createDocumentWithDate("1943-05-12"); + + assertThatThrownBy(() -> + jdbc.update( + "UPDATE documents SET meta_date_precision = 'RANGE', meta_date_end = '1943-01-01' WHERE id = ?", + docId) + ).isInstanceOf(DataIntegrityViolationException.class); + } + + @Test + void v69_metaDateRawCheck_rejectsOverlongText() { + UUID docId = createDocument(); + String tooLong = "x".repeat(10001); + + assertThatThrownBy(() -> + jdbc.update("UPDATE documents SET meta_date_raw = ? WHERE id = ?", tooLong, docId) + ).isInstanceOf(DataIntegrityViolationException.class); + } + + @Test + void v69_senderTextAndReceiverText_storeRawAttribution() { + UUID docId = createDocument(); + + int rows = jdbc.update( + "UPDATE documents SET sender_text = 'Oma Anna', receiver_text = 'Tante Grete' WHERE id = ?", + docId); + assertThat(rows).isEqualTo(1); + } + + @Test + @Transactional(propagation = Propagation.NOT_SUPPORTED) + void v69_personsSourceRef_uniqueIndexRejectsDuplicate() { + jdbc.update( + "INSERT INTO persons (id, last_name, source_ref) VALUES (gen_random_uuid(), 'A', 'person:dup')"); + try { + assertThatThrownBy(() -> + jdbc.update( + "INSERT INTO persons (id, last_name, source_ref) VALUES (gen_random_uuid(), 'B', 'person:dup')") + ).isInstanceOf(DataIntegrityViolationException.class); + } finally { + jdbc.update("DELETE FROM persons WHERE source_ref = 'person:dup'"); + } + } + + @Test + @Transactional(propagation = Propagation.NOT_SUPPORTED) + void v69_personsSourceRef_allowsMultipleNulls() { + UUID a = createPerson("Null", "RefA"); + UUID b = createPerson("Null", "RefB"); + try { + String refA = jdbc.queryForObject("SELECT source_ref FROM persons WHERE id = ?", String.class, a); + String refB = jdbc.queryForObject("SELECT source_ref FROM persons WHERE id = ?", String.class, b); + assertThat(refA).isNull(); + assertThat(refB).isNull(); + } finally { + jdbc.update("DELETE FROM persons WHERE id IN (?, ?)", a, b); + } + } + + @Test + void v69_personsProvisional_defaultsToFalse() { + UUID id = createPerson("Provisional", "Default"); + + Boolean provisional = jdbc.queryForObject( + "SELECT provisional FROM persons WHERE id = ?", Boolean.class, id); + assertThat(provisional).isFalse(); + } + + @Test + @Transactional(propagation = Propagation.NOT_SUPPORTED) + void v69_tagSourceRef_uniqueIndexRejectsDuplicate() { + jdbc.update("INSERT INTO tag (id, name, source_ref) VALUES (gen_random_uuid(), 'TagDupA', 'tag:dup')"); + try { + assertThatThrownBy(() -> + jdbc.update("INSERT INTO tag (id, name, source_ref) VALUES (gen_random_uuid(), 'TagDupB', 'tag:dup')") + ).isInstanceOf(DataIntegrityViolationException.class); + } finally { + jdbc.update("DELETE FROM tag WHERE source_ref = 'tag:dup'"); + } + } + // ─── helpers ───────────────────────────────────────────────────────────── private UUID createPerson(String firstName, String lastName) { @@ -504,6 +689,12 @@ class MigrationIntegrationTest { return doc.getId(); } + private UUID createDocumentWithDate(String isoDate) { + UUID id = createDocument(); + jdbc.update("UPDATE documents SET meta_date = ?::date WHERE id = ?", isoDate, id); + return id; + } + private UUID insertAnnotation(UUID docId) { UUID id = UUID.randomUUID(); jdbc.update(""" diff --git a/backend/src/test/java/org/raddatz/familienarchiv/document/DocumentControllerTest.java b/backend/src/test/java/org/raddatz/familienarchiv/document/DocumentControllerTest.java index f7c24541..d2f91d91 100644 --- a/backend/src/test/java/org/raddatz/familienarchiv/document/DocumentControllerTest.java +++ b/backend/src/test/java/org/raddatz/familienarchiv/document/DocumentControllerTest.java @@ -133,7 +133,8 @@ class DocumentControllerTest { "Er schrieb einen langen Brief", List.of(), false, List.of(), List.of(), List.of(), null, List.of()); when(documentService.searchDocuments(any(), any(), any(), any(), any(), any(), any(), any(), any(), any(), any(), any())) .thenReturn(DocumentSearchResult.of(List.of(new DocumentListItem( - docId, "Brief an Anna", "brief.pdf", null, null, null, + docId, "Brief an Anna", "brief.pdf", null, null, + DatePrecision.UNKNOWN, null, null, List.of(), List.of(), null, null, null, null, 0, List.of(), matchData)))); @@ -151,7 +152,8 @@ class DocumentControllerTest { var matchData = new SearchMatchData(null, List.of(), false, List.of(), List.of(), List.of(), null, List.of()); when(documentService.searchDocuments(any(), any(), any(), any(), any(), any(), any(), any(), any(), any(), any(), any())) .thenReturn(DocumentSearchResult.of(List.of(new DocumentListItem( - docId, "Brief an Anna", "brief.pdf", null, null, null, + docId, "Brief an Anna", "brief.pdf", null, null, + DatePrecision.UNKNOWN, null, null, List.of(), List.of(), null, null, null, null, 0, List.of(), matchData)))); diff --git a/backend/src/test/java/org/raddatz/familienarchiv/document/DocumentListItemIntegrationTest.java b/backend/src/test/java/org/raddatz/familienarchiv/document/DocumentListItemIntegrationTest.java index 4c532882..d97aaf9c 100644 --- a/backend/src/test/java/org/raddatz/familienarchiv/document/DocumentListItemIntegrationTest.java +++ b/backend/src/test/java/org/raddatz/familienarchiv/document/DocumentListItemIntegrationTest.java @@ -81,6 +81,28 @@ class DocumentListItemIntegrationTest { assertThat(item.title()).isEqualTo("Kurrent Brief"); } + @Test + void search_listItem_carriesMetaDatePrecisionAndEnd() { + documentRepository.save(Document.builder() + .title("Range Brief") + .originalFilename("range.pdf") + .status(DocumentStatus.UPLOADED) + .documentDate(java.time.LocalDate.of(1943, 1, 1)) + .metaDatePrecision(DatePrecision.RANGE) + .metaDateEnd(java.time.LocalDate.of(1943, 12, 31)) + .build()); + + DocumentSearchResult result = documentService.searchDocuments( + null, null, null, null, null, null, null, null, + DocumentSort.DATE, "DESC", null, + PageRequest.of(0, 50)); + + DocumentListItem item = result.items().stream() + .filter(i -> i.title().equals("Range Brief")).findFirst().orElseThrow(); + assertThat(item.metaDatePrecision()).isEqualTo(DatePrecision.RANGE); + assertThat(item.metaDateEnd()).isEqualTo(java.time.LocalDate.of(1943, 12, 31)); + } + @Test void detail_stillReturnsTrainingLabels() { Document saved = documentRepository.save(Document.builder() diff --git a/backend/src/test/java/org/raddatz/familienarchiv/document/DocumentSearchResultTest.java b/backend/src/test/java/org/raddatz/familienarchiv/document/DocumentSearchResultTest.java index 1dd09fed..ca4c77f5 100644 --- a/backend/src/test/java/org/raddatz/familienarchiv/document/DocumentSearchResultTest.java +++ b/backend/src/test/java/org/raddatz/familienarchiv/document/DocumentSearchResultTest.java @@ -14,7 +14,8 @@ class DocumentSearchResultTest { private DocumentListItem item(UUID docId) { return new DocumentListItem( - docId, "Test", "test.pdf", null, null, null, + docId, "Test", "test.pdf", null, null, + DatePrecision.UNKNOWN, null, null, List.of(), List.of(), null, null, null, null, 0, List.of(), SearchMatchData.empty()); } @@ -64,7 +65,8 @@ class DocumentSearchResultTest { UUID id = UUID.randomUUID(); ActivityActorDTO actor = new ActivityActorDTO("AB", "#f00", "Anna Braun"); DocumentListItem item = new DocumentListItem( - id, "T", "t.pdf", null, null, null, + id, "T", "t.pdf", null, null, + DatePrecision.UNKNOWN, null, null, List.of(), List.of(), null, null, null, null, 75, List.of(actor), SearchMatchData.empty()); diff --git a/backend/src/test/java/org/raddatz/familienarchiv/person/PersonControllerTest.java b/backend/src/test/java/org/raddatz/familienarchiv/person/PersonControllerTest.java index e7767411..2dee3baa 100644 --- a/backend/src/test/java/org/raddatz/familienarchiv/person/PersonControllerTest.java +++ b/backend/src/test/java/org/raddatz/familienarchiv/person/PersonControllerTest.java @@ -117,6 +117,7 @@ class PersonControllerTest { public Integer getDeathYear() { return null; } public String getNotes() { return null; } public boolean isFamilyMember() { return false; } + public boolean isProvisional() { return false; } public long getDocumentCount() { return 0; } }; } diff --git a/backend/src/test/java/org/raddatz/familienarchiv/person/PersonRepositoryTest.java b/backend/src/test/java/org/raddatz/familienarchiv/person/PersonRepositoryTest.java index 8ccf27ba..2de9f69f 100644 --- a/backend/src/test/java/org/raddatz/familienarchiv/person/PersonRepositoryTest.java +++ b/backend/src/test/java/org/raddatz/familienarchiv/person/PersonRepositoryTest.java @@ -463,4 +463,46 @@ class PersonRepositoryTest { assertThat(result).hasSize(1); assertThat(result.get(0).getLastName()).isEqualTo("Gesellschafter des Verlages"); } + + // ─── #671: provisional must be SELECTed in all three native projections ─── + // Adding isProvisional() to the interface compiles even if a native query forgets + // to SELECT p.provisional — it then silently returns false. These tests are the only + // guard against that trap, so they must run against real Postgres. + + @Test + void findAllWithDocumentCount_projectsProvisionalTrue() { + personRepository.save(Person.builder() + .firstName("Inferred").lastName("Person").provisional(true).build()); + + List result = personRepository.findAllWithDocumentCount(); + + assertThat(result).anyMatch(PersonSummaryDTO::isProvisional); + } + + @Test + void searchWithDocumentCount_projectsProvisionalTrue() { + personRepository.save(Person.builder() + .firstName("Provisorisch").lastName("Müller").provisional(true).build()); + + List result = personRepository.searchWithDocumentCount("Provisorisch"); + + assertThat(result).hasSize(1); + assertThat(result.get(0).isProvisional()).isTrue(); + } + + @Test + void findTopByDocumentCount_projectsProvisionalTrue() { + Person provisional = personRepository.save(Person.builder() + .firstName("Top").lastName("Provisional").provisional(true).build()); + documentRepository.save(Document.builder() + .title("Brief").originalFilename("b.pdf") + .status(DocumentStatus.UPLOADED) + .sender(provisional).build()); + + List result = personRepository.findTopByDocumentCount(10); + + PersonSummaryDTO summary = result.stream() + .filter(p -> p.getId().equals(provisional.getId())).findFirst().orElseThrow(); + assertThat(summary.isProvisional()).isTrue(); + } } diff --git a/docs/GLOSSARY.md b/docs/GLOSSARY.md index 99da1775..1fefb7af 100644 --- a/docs/GLOSSARY.md +++ b/docs/GLOSSARY.md @@ -25,6 +25,11 @@ _Not to be confused with [AppUser](#appuser-appuser)_ — `Person` is a historic **UserGroup** (`UserGroup`) — a named permission bundle assigned to one or more `AppUser`s. A user's effective permissions are the union of all permissions across all groups they belong to. +**source_ref** (`Person.sourceRef`, `Tag.sourceRef`) — the import normalizer's stable identity for a `Person` (its `person_id`) or `Tag` (its canonical `tag_path`). It is the join key linking normalized records to documents and the idempotency key for re-import; null for manually created records and unique among non-null values. + +**provisional person** (`Person.provisional`) — a `Person` the importer inferred from raw attribution text but could not confidently match to a known individual. The flag lets the persons directory surface uncertainty honestly rather than fabricate a confident identity; it defaults to `false` and is set `true` only by the importer. +_Not to be confused with `family_member`_ — `provisional` expresses import confidence, while `family_member` is a genealogical fact about whether the person belongs to the family tree. + --- ## Document-Related Terms @@ -36,6 +41,10 @@ _See also [TranscriptionBlock](#transcriptionblock-transcriptionblock)._ **Document** (`Document`) — a single archival item (letter, postcard, photograph) with a file stored in MinIO/S3 and associated metadata (sender, receivers, date, tags, transcription blocks). +**date precision** (`Document.metaDatePrecision`, enum `DatePrecision`) — how exactly a document's date is known, one of `DAY, MONTH, SEASON, YEAR, RANGE, APPROX, UNKNOWN`. A verbatim mirror of the import normalizer's `Precision` enum so honest dates can be rendered (`APPROX` → "ca.", `RANGE` uses `meta_date_end`) instead of fabricating a false `DAY`-level date. `UNKNOWN` is the explicit value for undated documents. + +**raw attribution** (`Document.senderText`, `Document.receiverText`, `Document.metaDateRaw`) — the original spreadsheet cell text for a document's sender, receiver, and date, preserved verbatim even after a `Person` or normalized date is linked. It keeps provenance intact and enables an "as written in the original" view. + **DocumentVersion** (`DocumentVersion`) — an append-only snapshot of a `Document`'s metadata at a point in time. Append-only by convention; no consumer-facing create or update endpoint exists. The entity uses Lombok `@Data` (which generates setters), so immutability is enforced by application convention, not at the Java level. **Tag** (`Tag`) — a hierarchical category that can be applied to `Document`s. Tags are self-referencing via a `parent_id` foreign key, forming a tree structure. diff --git a/docs/adr/025-canonical-import-and-single-migration-schema-foundation.md b/docs/adr/025-canonical-import-and-single-migration-schema-foundation.md new file mode 100644 index 00000000..0feb670b --- /dev/null +++ b/docs/adr/025-canonical-import-and-single-migration-schema-foundation.md @@ -0,0 +1,83 @@ +# ADR-025 — Canonical Import Output as Contract & Single-Migration Schema Foundation + +**Date:** 2026-05-27 +**Status:** Accepted +**Issue:** #671 +**Milestone:** Handling the Unknowns — honest uncertainty in dates & people + +--- + +## Context + +The "Handling the Unknowns" milestone introduces honest uncertainty into the archive: +documents whose dates are known only approximately or as a range, and people the importer +infers from raw attribution text but cannot confidently identify. Three sibling issues — +date precision (#666), name triage (#665), and the importer (#669) — each independently +planned a Flyway `V69` migration that altered `persons`. Three `V69`s is a boot failure +(Flyway versions must be unique), and `persons.provisional` was at risk of being defined +twice. + +Two durable decisions had to be made before any application code in Phases 3–6 could +compile against the new schema. + +--- + +## Decision + +### 1. All import/precision/attribution/identity schema lives in ONE migration with a single owner + +`V69__import_precision_attribution_identity_schema.sql` adds every new column for this +milestone in a single, atomic, forward-only migration: + +- `documents`: `meta_date_precision` (backfilled `DAY` where dated / `UNKNOWN` where not, + then `NOT NULL`), `meta_date_end`, `meta_date_raw`, `sender_text`, `receiver_text`. +- `persons`: `source_ref` (unique index, nullable), `provisional` (`NOT NULL DEFAULT false`). +- `tag`: `source_ref` (unique index, nullable). + +Integrity is pushed to the database as fail-closed `CHECK` constraints (the precedent is +`V22`'s `person_type` allowlist): + +- `meta_date_precision` must be one of the seven enum values. +- `meta_date_end` may be non-null **only** when precision = `RANGE` (one-directional, not + biconditional — see Consequences). +- `meta_date_end >= meta_date` for ranges with both endpoints (a `CHECK`, not a trigger). +- `meta_date_raw`, `sender_text`, `receiver_text` are length-capped at 10 000 (mirrors the + `transcription_blocks` cap in `V18`). + +No sibling issue adds another migration that alters `persons` or `documents` in this +milestone. + +### 2. The backend `DatePrecision` enum is a verbatim mirror of the normalizer's `Precision`; the canonical output is the contract + +The importer reads the Python normalizer's canonical output +(`tools/import-normalizer/`). The backend `DatePrecision` enum +(`DAY, MONTH, SEASON, YEAR, RANGE, APPROX, UNKNOWN`) is a verbatim copy of the normalizer's +`Precision(StrEnum)` (`dates.py`). There is **no translation layer**: the normalizer's +output strings are persisted as-is. The same applies to `source_ref`, which carries the +normalizer's `person_id` / canonical `tag_path` unchanged as the re-import idempotency key. + +--- + +## Consequences + +- **RANGE is one-directional, not biconditional.** A `RANGE` row may have a null + `meta_date_end` (an open-ended range with only a start), because the normalizer can emit + start-only ranges. A biconditional `RANGE ⟺ end IS NOT NULL` rule would reject valid + normalizer output, so it was rejected. Phase 4 rendering must handle a `RANGE` with no end + gracefully. +- **`provisional` stays `false` throughout this phase.** The column and flag exist, but no + code path sets it `true`; the importer (Phase 3) is the only writer. This is intentional, + not a half-built feature. +- **A future dev must not "improve" the enum.** Renaming or dropping a `DatePrecision` value + without changing the normalizer silently breaks import idempotency and date rendering. The + enum's Javadoc states this; the DB `CHECK` enforces validity independent of the Java enum. +- **`source_ref` is unique + nullable.** Manually created persons/tags have `source_ref = + NULL`; Postgres allows multiple NULLs under a plain unique index, so no backfill is needed. +- **Forward-only.** The migration is immutable once shipped (Flyway checksum model); any fix + goes in a later version. There is no down-migration — rollback means restoring from the + nightly `pg_dump`, the standard procedure. +- **`PersonSummaryDTO` coupling.** `provisional` was added to the `PersonSummaryDTO` native + interface projection; because the projection is backed by native SQL, the column had to be + added to all three native `SELECT`s (`findAllWithDocumentCount`, `searchWithDocumentCount`, + `findTopByDocumentCount`) or it would silently return `false`. Guarded by integration tests + against real Postgres. diff --git a/docs/architecture/db/db-orm.puml b/docs/architecture/db/db-orm.puml index a6e64aa3..7b03c156 100644 --- a/docs/architecture/db/db-orm.puml +++ b/docs/architecture/db/db-orm.puml @@ -1,6 +1,6 @@ @startuml db-orm -' Schema source: Flyway V1–V60 (excl. V37, V43 — intentionally removed) -' Schema as of: V60 (2026-05-06) +' Schema source: Flyway V1–V69 (excl. V37, V43 — intentionally removed) +' Schema as of: V69 (2026-05-27) ' ⚠ This is a versioned snapshot. Update when the schema changes significantly. hide circle @@ -88,6 +88,11 @@ package "Documents" { summary : TEXT transcription : TEXT meta_date : DATE + meta_date_precision : VARCHAR(16) NOT NULL + meta_date_end : DATE + meta_date_raw : TEXT + sender_text : TEXT + receiver_text : TEXT meta_location : VARCHAR(255) meta_document_location : VARCHAR(255) archive_box : VARCHAR(255) @@ -182,6 +187,8 @@ package "Persons" { birth_year : INTEGER death_year : INTEGER family_member : BOOLEAN NOT NULL + source_ref : VARCHAR(255) UNIQUE + provisional : BOOLEAN NOT NULL } entity person_name_aliases { @@ -217,6 +224,7 @@ package "Tags" { name : VARCHAR(255) NOT NULL UNIQUE parent_id : UUID <> color : VARCHAR(20) + source_ref : VARCHAR(255) UNIQUE } } diff --git a/docs/architecture/db/db-relationships.puml b/docs/architecture/db/db-relationships.puml index c3100cfa..d6f4b542 100644 --- a/docs/architecture/db/db-relationships.puml +++ b/docs/architecture/db/db-relationships.puml @@ -1,7 +1,9 @@ @startuml db-relationships -' Schema source: Flyway V1–V60 (excl. V37, V43 — intentionally removed) -' Schema as of: V60 (2026-05-06) +' Schema source: Flyway V1–V69 (excl. V37, V43 — intentionally removed) +' Schema as of: V69 (2026-05-27) ' ⚠ This is a versioned snapshot. Update when the schema changes significantly. +' Note: V69 adds columns only (persons.source_ref, tag.source_ref, document +' precision/attribution fields); no new FK relationships, so this diagram is unchanged. hide circle skinparam linetype ortho diff --git a/frontend/src/lib/generated/api.ts b/frontend/src/lib/generated/api.ts index 9a9a5408..5ef387b4 100644 --- a/frontend/src/lib/generated/api.ts +++ b/frontend/src/lib/generated/api.ts @@ -1636,6 +1636,7 @@ export interface components { /** Format: uuid */ parentId?: string; color?: string; + sourceRef?: string; }; PersonUpdateDTO: { /** @enum {string} */ @@ -1665,12 +1666,21 @@ export interface components { /** Format: int32 */ deathYear?: number; familyMember: boolean; + sourceRef?: string; + provisional: boolean; readonly displayName: string; }; DocumentUpdateDTO: { title?: string; /** Format: date */ documentDate?: string; + /** @enum {string} */ + metaDatePrecision?: "DAY" | "MONTH" | "SEASON" | "YEAR" | "RANGE" | "APPROX" | "UNKNOWN"; + /** Format: date */ + metaDateEnd?: string; + metaDateRaw?: string; + senderText?: string; + receiverText?: string; location?: string; documentLocation?: string; archiveBox?: string; @@ -1704,6 +1714,13 @@ export interface components { status: "PLACEHOLDER" | "UPLOADED" | "TRANSCRIBED" | "REVIEWED" | "ARCHIVED"; /** Format: date */ documentDate?: string; + /** @enum {string} */ + metaDatePrecision: "DAY" | "MONTH" | "SEASON" | "YEAR" | "RANGE" | "APPROX" | "UNKNOWN"; + /** Format: date */ + metaDateEnd?: string; + metaDateRaw?: string; + senderText?: string; + receiverText?: string; location?: string; documentLocation?: string; archiveBox?: string; @@ -2024,6 +2041,10 @@ export interface components { receiverIds?: string[]; /** Format: date */ documentDate?: string; + /** @enum {string} */ + metaDatePrecision?: "DAY" | "MONTH" | "SEASON" | "YEAR" | "RANGE" | "APPROX" | "UNKNOWN"; + /** Format: date */ + metaDateEnd?: string; location?: string; tagNames?: string[]; metadataComplete?: boolean; @@ -2221,6 +2242,7 @@ export interface components { notes?: string; personType?: string; familyMember?: boolean; + provisional?: boolean; }; InferredRelationshipWithPersonDTO: { person: components["schemas"]["PersonNodeDTO"]; @@ -2396,6 +2418,10 @@ export interface components { thumbnailUrl?: string; /** Format: date */ documentDate?: string; + /** @enum {string} */ + metaDatePrecision: "DAY" | "MONTH" | "SEASON" | "YEAR" | "RANGE" | "APPROX" | "UNKNOWN"; + /** Format: date */ + metaDateEnd?: string; sender?: components["schemas"]["Person"]; receivers: components["schemas"]["Person"][]; tags: components["schemas"]["Tag"][]; diff --git a/frontend/src/lib/shared/dashboard/ReaderRecentDocs.svelte.spec.ts b/frontend/src/lib/shared/dashboard/ReaderRecentDocs.svelte.spec.ts index c13c92b1..c0022274 100644 --- a/frontend/src/lib/shared/dashboard/ReaderRecentDocs.svelte.spec.ts +++ b/frontend/src/lib/shared/dashboard/ReaderRecentDocs.svelte.spec.ts @@ -16,6 +16,7 @@ const baseDoc: Document = { title: 'Brief an Hans', originalFilename: 'brief.pdf', status: 'UPLOADED', + metaDatePrecision: 'UNKNOWN', metadataComplete: true, scriptType: 'HANDWRITING_KURRENT', createdAt: '2025-01-01T12:00:00Z', @@ -127,7 +128,8 @@ describe('ReaderRecentDocs', () => { firstName: 'Anna', displayName: 'Anna Müller', personType: 'PERSON' as const, - familyMember: false + familyMember: false, + provisional: false } }; render(ReaderRecentDocs, { documents: [docWithSender] }); diff --git a/frontend/src/lib/shared/discussion/MentionDropdown.svelte.test.ts b/frontend/src/lib/shared/discussion/MentionDropdown.svelte.test.ts index fcab66b6..849f631c 100644 --- a/frontend/src/lib/shared/discussion/MentionDropdown.svelte.test.ts +++ b/frontend/src/lib/shared/discussion/MentionDropdown.svelte.test.ts @@ -20,6 +20,7 @@ const makePerson = (id: string, name: string, overrides: Partial = {}): displayName: name, personType: 'PERSON', familyMember: false, + provisional: false, ...overrides }; }; diff --git a/frontend/src/lib/shared/discussion/PersonMentionEditor.svelte.spec.ts b/frontend/src/lib/shared/discussion/PersonMentionEditor.svelte.spec.ts index 3b58a62f..9ce23358 100644 --- a/frontend/src/lib/shared/discussion/PersonMentionEditor.svelte.spec.ts +++ b/frontend/src/lib/shared/discussion/PersonMentionEditor.svelte.spec.ts @@ -34,6 +34,7 @@ const AUGUSTE: Person = { displayName: 'Auguste Raddatz', personType: 'PERSON', familyMember: false, + provisional: false, birthYear: 1882, deathYear: 1944 }; @@ -45,6 +46,7 @@ const ANNA: Person = { displayName: 'Anna Schmidt', personType: 'PERSON', familyMember: false, + provisional: false, birthYear: 1860 };