diff --git a/backend/src/main/java/org/raddatz/familienarchiv/document/DatePrecision.java b/backend/src/main/java/org/raddatz/familienarchiv/document/DatePrecision.java new file mode 100644 index 00000000..e67f17e1 --- /dev/null +++ b/backend/src/main/java/org/raddatz/familienarchiv/document/DatePrecision.java @@ -0,0 +1,17 @@ +package org.raddatz.familienarchiv.document; + +/** + * Precision of a document's date. Verbatim mirror of the import normalizer's + * {@code Precision} enum (tools/import-normalizer/dates.py) — the canonical output is the + * contract, so there is no translation layer. Do not add, remove, or rename values without + * also changing the normalizer; a mismatch silently breaks import idempotency (see ADR-025). + */ +public enum DatePrecision { + DAY, + MONTH, + SEASON, + YEAR, + RANGE, + APPROX, + UNKNOWN +} diff --git a/backend/src/main/java/org/raddatz/familienarchiv/document/Document.java b/backend/src/main/java/org/raddatz/familienarchiv/document/Document.java index 71c6dead..7f702763 100644 --- a/backend/src/main/java/org/raddatz/familienarchiv/document/Document.java +++ b/backend/src/main/java/org/raddatz/familienarchiv/document/Document.java @@ -91,6 +91,29 @@ public class Document { @Column(name = "meta_date") private LocalDate documentDate; // Wann wurde der Brief geschrieben? + // Precision of documentDate — drives honest rendering ("ca. 1943", "Frühjahr 1943"). + // Verbatim mirror of the normalizer's Precision enum (see ADR-025). + @Enumerated(EnumType.STRING) + @Column(name = "meta_date_precision", nullable = false, length = 16) + @Schema(requiredMode = Schema.RequiredMode.REQUIRED) + @Builder.Default + private DatePrecision metaDatePrecision = DatePrecision.UNKNOWN; + + // Range end — only set when metaDatePrecision is RANGE (open-ended ranges allowed → may be null). + @Column(name = "meta_date_end") + private LocalDate metaDateEnd; + + // Original date cell, verbatim, preserved for provenance and "as written" display. + @Column(name = "meta_date_raw", columnDefinition = "TEXT") + private String metaDateRaw; + + // Raw attribution preserved even when a person is linked via sender/receivers. + @Column(name = "sender_text", columnDefinition = "TEXT") + private String senderText; + + @Column(name = "receiver_text", columnDefinition = "TEXT") + private String receiverText; + @Column(name = "meta_location") private String location; diff --git a/backend/src/main/java/org/raddatz/familienarchiv/person/Person.java b/backend/src/main/java/org/raddatz/familienarchiv/person/Person.java index d2332519..993480c4 100644 --- a/backend/src/main/java/org/raddatz/familienarchiv/person/Person.java +++ b/backend/src/main/java/org/raddatz/familienarchiv/person/Person.java @@ -57,6 +57,18 @@ public class Person { @Schema(requiredMode = Schema.RequiredMode.REQUIRED) private boolean familyMember = false; + // The normalizer person_id — join key and re-import idempotency key. Null for manually + // created persons; unique among non-null values (see ADR-025). + @Column(name = "source_ref") + private String sourceRef; + + // A provisional person is one the importer inferred but could not confidently identify. + // Distinct from familyMember (a genealogical fact); set true only by the importer (Phase 3). + @Column(name = "provisional", nullable = false) + @Builder.Default + @Schema(requiredMode = Schema.RequiredMode.REQUIRED) + private boolean provisional = false; + // Entity-graph navigation for JPA JOIN queries (e.g. DocumentSpecifications.hasText). // Uses entity relationship rather than cross-domain repository access, avoiding a // separate DB roundtrip while respecting domain boundaries. diff --git a/backend/src/main/java/org/raddatz/familienarchiv/tag/Tag.java b/backend/src/main/java/org/raddatz/familienarchiv/tag/Tag.java index fc5974a6..32585eed 100644 --- a/backend/src/main/java/org/raddatz/familienarchiv/tag/Tag.java +++ b/backend/src/main/java/org/raddatz/familienarchiv/tag/Tag.java @@ -30,4 +30,11 @@ public class Tag { /** Color token name (e.g. "sage"), only set on root-level tags. Null means no color. */ private String color; + + /** + * Import identity key, keyed on the canonical tag_path. Null for manually created tags; + * unique among non-null values. The importer (Phase 3) uses it for idempotent re-import. + */ + @Column(name = "source_ref") + private String sourceRef; } diff --git a/backend/src/main/resources/db/migration/V69__import_precision_attribution_identity_schema.sql b/backend/src/main/resources/db/migration/V69__import_precision_attribution_identity_schema.sql new file mode 100644 index 00000000..1c621656 --- /dev/null +++ b/backend/src/main/resources/db/migration/V69__import_precision_attribution_identity_schema.sql @@ -0,0 +1,64 @@ +-- Phase 2 of "Handling the Unknowns": the schema foundation. +-- Consolidates every new import/precision/attribution/identity column into ONE +-- migration with a single owner so downstream phases (importer, rendering, persons +-- directory) compile against a finished, collision-free schema. See ADR-025. +-- +-- This file is forward-only and immutable once shipped (Flyway checksum model): +-- any fix goes in a later version, never an edit here. + +-- ─── documents: date precision, range end, raw date, raw attribution ────────── + +-- Range end is only set for RANGE precision (open-ended ranges allowed → end may be null). +ALTER TABLE documents ADD COLUMN meta_date_end date; + +-- Original date cell, verbatim, for provenance and "as written" display (Phase 4). +ALTER TABLE documents ADD COLUMN meta_date_raw text; + +-- Raw attribution preserved even when a person is linked. +ALTER TABLE documents ADD COLUMN sender_text text; +ALTER TABLE documents ADD COLUMN receiver_text text; + +-- Bound user-influenced spreadsheet text at the DB layer (mirrors transcription_blocks +-- length cap in V18). Defense in depth against malformed/huge import cells. +ALTER TABLE documents ADD CONSTRAINT chk_meta_date_raw_length CHECK (length(meta_date_raw) <= 10000); +ALTER TABLE documents ADD CONSTRAINT chk_sender_text_length CHECK (length(sender_text) <= 10000); +ALTER TABLE documents ADD CONSTRAINT chk_receiver_text_length CHECK (length(receiver_text) <= 10000); + +-- Precision enum — added nullable, backfilled, then made NOT NULL (in this order so the +-- backfill can populate existing rows before the constraint is enforced). +ALTER TABLE documents ADD COLUMN meta_date_precision varchar(16); + +UPDATE documents +SET meta_date_precision = CASE WHEN meta_date IS NOT NULL THEN 'DAY' ELSE 'UNKNOWN' END; + +ALTER TABLE documents ALTER COLUMN meta_date_precision SET NOT NULL; + +-- Fail-closed allowlist of the seven precision values (verbatim mirror of the +-- normalizer's Precision enum). The DB enforces validity independent of the Java enum. +ALTER TABLE documents ADD CONSTRAINT chk_meta_date_precision + CHECK (meta_date_precision IN ('DAY', 'MONTH', 'SEASON', 'YEAR', 'RANGE', 'APPROX', 'UNKNOWN')); + +-- A non-null range end is permitted only when precision = RANGE. A RANGE row MAY have a +-- null end (open-ended range), so the rule is one-directional, not biconditional. +ALTER TABLE documents ADD CONSTRAINT chk_meta_date_end_only_for_range + CHECK (meta_date_end IS NULL OR meta_date_precision = 'RANGE'); + +-- For ranges with both endpoints, the end must not precede the start. +ALTER TABLE documents ADD CONSTRAINT chk_meta_date_end_after_start + CHECK (meta_date_end IS NULL OR meta_date IS NULL OR meta_date_end >= meta_date); + +-- ─── persons: source_ref (import identity) + provisional flag ───────────────── + +-- The normalizer person_id: join key for documents → persons and idempotency key for +-- re-import. Nullable (manually created persons never have one); unique among non-nulls. +ALTER TABLE persons ADD COLUMN source_ref varchar(255); +CREATE UNIQUE INDEX idx_persons_source_ref ON persons (source_ref); + +-- A provisional person is one the importer inferred but could not confidently identify. +-- Stays false until Phase 3 (importer) sets it; no code path writes true in this phase. +ALTER TABLE persons ADD COLUMN provisional boolean NOT NULL DEFAULT false; + +-- ─── tag: source_ref (import identity, keyed on canonical tag_path) ─────────── + +ALTER TABLE tag ADD COLUMN source_ref varchar(255); +CREATE UNIQUE INDEX idx_tag_source_ref ON tag (source_ref); diff --git a/backend/src/test/java/org/raddatz/familienarchiv/MigrationIntegrationTest.java b/backend/src/test/java/org/raddatz/familienarchiv/MigrationIntegrationTest.java index 425d0f59..57644ee1 100644 --- a/backend/src/test/java/org/raddatz/familienarchiv/MigrationIntegrationTest.java +++ b/backend/src/test/java/org/raddatz/familienarchiv/MigrationIntegrationTest.java @@ -479,6 +479,172 @@ class MigrationIntegrationTest { assertThat(count).isEqualTo(1); } + // ─── V69: import/precision/attribution/identity schema foundation ──────── + + @Test + void v69_metaDatePrecisionColumn_isNotNull() { + Integer count = jdbc.queryForObject( + """ + SELECT COUNT(*) FROM information_schema.columns + WHERE table_schema = 'public' + AND table_name = 'documents' + AND column_name = 'meta_date_precision' + AND is_nullable = 'NO' + """, + Integer.class); + assertThat(count).isEqualTo(1); + } + + @Test + void v69_backfillSql_setsDatedRowsToDayPrecision() { + // Re-run the migration's backfill UPDATE on a freshly dated row to prove the rule. + UUID docId = createDocumentWithDate("1943-05-12"); + + jdbc.update(V69_BACKFILL_PRECISION_SQL); + + String precision = jdbc.queryForObject( + "SELECT meta_date_precision FROM documents WHERE id = ?", String.class, docId); + assertThat(precision).isEqualTo("DAY"); + } + + @Test + void v69_backfillSql_setsUndatedRowsToUnknownPrecision() { + UUID docId = createDocument(); // no meta_date + + jdbc.update(V69_BACKFILL_PRECISION_SQL); + + String precision = jdbc.queryForObject( + "SELECT meta_date_precision FROM documents WHERE id = ?", String.class, docId); + assertThat(precision).isEqualTo("UNKNOWN"); + } + + // Mirrors the backfill UPDATE shipped in V69; idempotent for verification. + private static final String V69_BACKFILL_PRECISION_SQL = """ + UPDATE documents + SET meta_date_precision = CASE WHEN meta_date IS NOT NULL THEN 'DAY' ELSE 'UNKNOWN' END + """; + + @Test + void v69_precisionCheck_rejectsValueOutsideEnum() { + UUID docId = createDocument(); + + assertThatThrownBy(() -> + jdbc.update("UPDATE documents SET meta_date_precision = 'BOGUS' WHERE id = ?", docId) + ).isInstanceOf(DataIntegrityViolationException.class); + } + + @Test + void v69_metaDateEndCheck_rejectsNonNullEndWhenPrecisionNotRange() { + UUID docId = createDocumentWithDate("1943-05-12"); // precision DAY + + assertThatThrownBy(() -> + jdbc.update("UPDATE documents SET meta_date_end = '1943-06-01' WHERE id = ?", docId) + ).isInstanceOf(DataIntegrityViolationException.class); + } + + @Test + void v69_metaDateEndCheck_allowsNonNullEndWhenPrecisionRange() { + UUID docId = createDocumentWithDate("1943-05-12"); + + int rows = jdbc.update( + "UPDATE documents SET meta_date_precision = 'RANGE', meta_date_end = '1943-06-01' WHERE id = ?", + docId); + assertThat(rows).isEqualTo(1); + } + + @Test + void v69_metaDateEndCheck_allowsRangeWithNullEnd() { + // Loose semantics: the normalizer may emit an open-ended RANGE (start only). + UUID docId = createDocumentWithDate("1943-05-12"); + + int rows = jdbc.update( + "UPDATE documents SET meta_date_precision = 'RANGE' WHERE id = ?", docId); + assertThat(rows).isEqualTo(1); + } + + @Test + void v69_rangeOrderCheck_rejectsEndBeforeStart() { + UUID docId = createDocumentWithDate("1943-05-12"); + + assertThatThrownBy(() -> + jdbc.update( + "UPDATE documents SET meta_date_precision = 'RANGE', meta_date_end = '1943-01-01' WHERE id = ?", + docId) + ).isInstanceOf(DataIntegrityViolationException.class); + } + + @Test + void v69_metaDateRawCheck_rejectsOverlongText() { + UUID docId = createDocument(); + String tooLong = "x".repeat(10001); + + assertThatThrownBy(() -> + jdbc.update("UPDATE documents SET meta_date_raw = ? WHERE id = ?", tooLong, docId) + ).isInstanceOf(DataIntegrityViolationException.class); + } + + @Test + void v69_senderTextAndReceiverText_storeRawAttribution() { + UUID docId = createDocument(); + + int rows = jdbc.update( + "UPDATE documents SET sender_text = 'Oma Anna', receiver_text = 'Tante Grete' WHERE id = ?", + docId); + assertThat(rows).isEqualTo(1); + } + + @Test + @Transactional(propagation = Propagation.NOT_SUPPORTED) + void v69_personsSourceRef_uniqueIndexRejectsDuplicate() { + jdbc.update( + "INSERT INTO persons (id, last_name, source_ref) VALUES (gen_random_uuid(), 'A', 'person:dup')"); + try { + assertThatThrownBy(() -> + jdbc.update( + "INSERT INTO persons (id, last_name, source_ref) VALUES (gen_random_uuid(), 'B', 'person:dup')") + ).isInstanceOf(DataIntegrityViolationException.class); + } finally { + jdbc.update("DELETE FROM persons WHERE source_ref = 'person:dup'"); + } + } + + @Test + @Transactional(propagation = Propagation.NOT_SUPPORTED) + void v69_personsSourceRef_allowsMultipleNulls() { + UUID a = createPerson("Null", "RefA"); + UUID b = createPerson("Null", "RefB"); + try { + String refA = jdbc.queryForObject("SELECT source_ref FROM persons WHERE id = ?", String.class, a); + String refB = jdbc.queryForObject("SELECT source_ref FROM persons WHERE id = ?", String.class, b); + assertThat(refA).isNull(); + assertThat(refB).isNull(); + } finally { + jdbc.update("DELETE FROM persons WHERE id IN (?, ?)", a, b); + } + } + + @Test + void v69_personsProvisional_defaultsToFalse() { + UUID id = createPerson("Provisional", "Default"); + + Boolean provisional = jdbc.queryForObject( + "SELECT provisional FROM persons WHERE id = ?", Boolean.class, id); + assertThat(provisional).isFalse(); + } + + @Test + @Transactional(propagation = Propagation.NOT_SUPPORTED) + void v69_tagSourceRef_uniqueIndexRejectsDuplicate() { + jdbc.update("INSERT INTO tag (id, name, source_ref) VALUES (gen_random_uuid(), 'TagDupA', 'tag:dup')"); + try { + assertThatThrownBy(() -> + jdbc.update("INSERT INTO tag (id, name, source_ref) VALUES (gen_random_uuid(), 'TagDupB', 'tag:dup')") + ).isInstanceOf(DataIntegrityViolationException.class); + } finally { + jdbc.update("DELETE FROM tag WHERE source_ref = 'tag:dup'"); + } + } + // ─── helpers ───────────────────────────────────────────────────────────── private UUID createPerson(String firstName, String lastName) { @@ -504,6 +670,12 @@ class MigrationIntegrationTest { return doc.getId(); } + private UUID createDocumentWithDate(String isoDate) { + UUID id = createDocument(); + jdbc.update("UPDATE documents SET meta_date = ?::date WHERE id = ?", isoDate, id); + return id; + } + private UUID insertAnnotation(UUID docId) { UUID id = UUID.randomUUID(); jdbc.update("""