feat(schema): one migration + domain model for import/precision/identity (Phase 2, #671) #673

Merged
marcel merged 10 commits from feature/671-schema-foundation into docs/import-migration 2026-05-27 10:13:54 +02:00
25 changed files with 545 additions and 13 deletions

View File

@@ -0,0 +1,17 @@
package org.raddatz.familienarchiv.document;
/**
* Precision of a document's date. Verbatim mirror of the import normalizer's
* {@code Precision} enum (tools/import-normalizer/dates.py) — the canonical output is the
* contract, so there is no translation layer. Do not add, remove, or rename values without
* also changing the normalizer; a mismatch silently breaks import idempotency (see ADR-025).
*/
public enum DatePrecision {
DAY,
MONTH,
SEASON,
YEAR,
RANGE,
APPROX,
UNKNOWN
}

View File

@@ -91,6 +91,29 @@ public class Document {
@Column(name = "meta_date") @Column(name = "meta_date")
private LocalDate documentDate; // Wann wurde der Brief geschrieben? private LocalDate documentDate; // Wann wurde der Brief geschrieben?
// Precision of documentDate — drives honest rendering ("ca. 1943", "Frühjahr 1943").
// Verbatim mirror of the normalizer's Precision enum (see ADR-025).
@Enumerated(EnumType.STRING)
@Column(name = "meta_date_precision", nullable = false, length = 16)
@Schema(requiredMode = Schema.RequiredMode.REQUIRED)
@Builder.Default
private DatePrecision metaDatePrecision = DatePrecision.UNKNOWN;
// Range end — only set when metaDatePrecision is RANGE (open-ended ranges allowed → may be null).
@Column(name = "meta_date_end")
private LocalDate metaDateEnd;
// Original date cell, verbatim, preserved for provenance and "as written" display.
@Column(name = "meta_date_raw", columnDefinition = "TEXT")
private String metaDateRaw;
// Raw attribution preserved even when a person is linked via sender/receivers.
@Column(name = "sender_text", columnDefinition = "TEXT")
private String senderText;
@Column(name = "receiver_text", columnDefinition = "TEXT")
private String receiverText;
@Column(name = "meta_location") @Column(name = "meta_location")
private String location; private String location;

View File

@@ -12,6 +12,8 @@ public class DocumentBatchMetadataDTO {
private UUID senderId; private UUID senderId;
private List<UUID> receiverIds; private List<UUID> receiverIds;
private LocalDate documentDate; private LocalDate documentDate;
private DatePrecision metaDatePrecision;
private LocalDate metaDateEnd;
private String location; private String location;
private List<String> tagNames; private List<String> tagNames;
private Boolean metadataComplete; private Boolean metadataComplete;

View File

@@ -18,6 +18,9 @@ public record DocumentListItem(
String originalFilename, String originalFilename,
String thumbnailUrl, String thumbnailUrl,
LocalDate documentDate, LocalDate documentDate,
@Schema(requiredMode = Schema.RequiredMode.REQUIRED)
DatePrecision metaDatePrecision,
LocalDate metaDateEnd,
Person sender, Person sender,
@Schema(requiredMode = Schema.RequiredMode.REQUIRED) @Schema(requiredMode = Schema.RequiredMode.REQUIRED)
List<Person> receivers, List<Person> receivers,

View File

@@ -758,6 +758,8 @@ public class DocumentService {
doc.getOriginalFilename(), doc.getOriginalFilename(),
doc.getThumbnailUrl(), doc.getThumbnailUrl(),
doc.getDocumentDate(), doc.getDocumentDate(),
doc.getMetaDatePrecision(),
doc.getMetaDateEnd(),
doc.getSender(), doc.getSender(),
List.copyOf(doc.getReceivers()), List.copyOf(doc.getReceivers()),
List.copyOf(doc.getTags()), List.copyOf(doc.getTags()),

View File

@@ -11,6 +11,11 @@ import org.raddatz.familienarchiv.ocr.ScriptType;
public class DocumentUpdateDTO { public class DocumentUpdateDTO {
private String title; private String title;
private LocalDate documentDate; private LocalDate documentDate;
private DatePrecision metaDatePrecision;
private LocalDate metaDateEnd;
private String metaDateRaw;
private String senderText;
private String receiverText;
private String location; private String location;
private String documentLocation; private String documentLocation;
private String archiveBox; private String archiveBox;

View File

@@ -57,6 +57,18 @@ public class Person {
@Schema(requiredMode = Schema.RequiredMode.REQUIRED) @Schema(requiredMode = Schema.RequiredMode.REQUIRED)
private boolean familyMember = false; private boolean familyMember = false;
// The normalizer person_id — join key and re-import idempotency key. Null for manually
// created persons; unique among non-null values (see ADR-025).
@Column(name = "source_ref")
private String sourceRef;
// A provisional person is one the importer inferred but could not confidently identify.
// Distinct from familyMember (a genealogical fact); set true only by the importer (Phase 3).
@Column(name = "provisional", nullable = false)
@Builder.Default
@Schema(requiredMode = Schema.RequiredMode.REQUIRED)
private boolean provisional = false;
// Entity-graph navigation for JPA JOIN queries (e.g. DocumentSpecifications.hasText). // Entity-graph navigation for JPA JOIN queries (e.g. DocumentSpecifications.hasText).
// Uses entity relationship rather than cross-domain repository access, avoiding a // Uses entity relationship rather than cross-domain repository access, avoiding a
// separate DB roundtrip while respecting domain boundaries. // separate DB roundtrip while respecting domain boundaries.

View File

@@ -41,7 +41,7 @@ public interface PersonRepository extends JpaRepository<Person, UUID> {
SELECT p.id, p.title, p.first_name AS firstName, p.last_name AS lastName, SELECT p.id, p.title, p.first_name AS firstName, p.last_name AS lastName,
p.person_type AS personType, p.person_type AS personType,
p.alias, p.birth_year AS birthYear, p.death_year AS deathYear, p.notes, p.alias, p.birth_year AS birthYear, p.death_year AS deathYear, p.notes,
p.family_member AS familyMember, p.family_member AS familyMember, p.provisional AS provisional,
(SELECT COUNT(*) FROM documents d WHERE d.sender_id = p.id) (SELECT COUNT(*) FROM documents d WHERE d.sender_id = p.id)
+ (SELECT COUNT(*) FROM document_receivers dr WHERE dr.person_id = p.id) AS documentCount + (SELECT COUNT(*) FROM document_receivers dr WHERE dr.person_id = p.id) AS documentCount
FROM persons p FROM persons p
@@ -54,7 +54,7 @@ public interface PersonRepository extends JpaRepository<Person, UUID> {
SELECT p.id, p.title, p.first_name AS firstName, p.last_name AS lastName, SELECT p.id, p.title, p.first_name AS firstName, p.last_name AS lastName,
p.person_type AS personType, p.person_type AS personType,
p.alias, p.birth_year AS birthYear, p.death_year AS deathYear, p.notes, p.alias, p.birth_year AS birthYear, p.death_year AS deathYear, p.notes,
p.family_member AS familyMember, p.family_member AS familyMember, p.provisional AS provisional,
(SELECT COUNT(*) FROM documents d WHERE d.sender_id = p.id) (SELECT COUNT(*) FROM documents d WHERE d.sender_id = p.id)
+ (SELECT COUNT(*) FROM document_receivers dr WHERE dr.person_id = p.id) AS documentCount + (SELECT COUNT(*) FROM document_receivers dr WHERE dr.person_id = p.id) AS documentCount
FROM persons p FROM persons p
@@ -63,7 +63,7 @@ public interface PersonRepository extends JpaRepository<Person, UUID> {
OR LOWER(CONCAT(p.last_name,' ',COALESCE(p.first_name,''))) LIKE LOWER(CONCAT('%',:query,'%')) OR LOWER(CONCAT(p.last_name,' ',COALESCE(p.first_name,''))) LIKE LOWER(CONCAT('%',:query,'%'))
OR LOWER(p.alias) LIKE LOWER(CONCAT('%',:query,'%')) OR LOWER(p.alias) LIKE LOWER(CONCAT('%',:query,'%'))
OR LOWER(a.last_name) LIKE LOWER(CONCAT('%',:query,'%')) OR LOWER(a.last_name) LIKE LOWER(CONCAT('%',:query,'%'))
GROUP BY p.id, p.title, p.first_name, p.last_name, p.person_type, p.alias, p.birth_year, p.death_year, p.notes, p.family_member GROUP BY p.id, p.title, p.first_name, p.last_name, p.person_type, p.alias, p.birth_year, p.death_year, p.notes, p.family_member, p.provisional
ORDER BY p.last_name ASC, p.first_name ASC ORDER BY p.last_name ASC, p.first_name ASC
""", """,
nativeQuery = true) nativeQuery = true)
@@ -75,7 +75,7 @@ public interface PersonRepository extends JpaRepository<Person, UUID> {
SELECT p.id, p.title, p.first_name AS firstName, p.last_name AS lastName, SELECT p.id, p.title, p.first_name AS firstName, p.last_name AS lastName,
p.person_type AS personType, p.person_type AS personType,
p.alias, p.birth_year AS birthYear, p.death_year AS deathYear, p.notes, p.alias, p.birth_year AS birthYear, p.death_year AS deathYear, p.notes,
p.family_member AS familyMember, p.family_member AS familyMember, p.provisional AS provisional,
(SELECT COUNT(*) FROM documents d WHERE d.sender_id = p.id) (SELECT COUNT(*) FROM documents d WHERE d.sender_id = p.id)
+ (SELECT COUNT(*) FROM document_receivers dr WHERE dr.person_id = p.id) AS documentCount + (SELECT COUNT(*) FROM document_receivers dr WHERE dr.person_id = p.id) AS documentCount
FROM persons p FROM persons p

View File

@@ -18,6 +18,7 @@ public interface PersonSummaryDTO {
Integer getDeathYear(); Integer getDeathYear();
String getNotes(); String getNotes();
boolean isFamilyMember(); boolean isFamilyMember();
boolean isProvisional();
long getDocumentCount(); long getDocumentCount();
default String getDisplayName() { default String getDisplayName() {

View File

@@ -30,4 +30,11 @@ public class Tag {
/** Color token name (e.g. "sage"), only set on root-level tags. Null means no color. */ /** Color token name (e.g. "sage"), only set on root-level tags. Null means no color. */
private String color; private String color;
/**
* Import identity key, keyed on the canonical tag_path. Null for manually created tags;
* unique among non-null values. The importer (Phase 3) uses it for idempotent re-import.
*/
@Column(name = "source_ref")
private String sourceRef;
} }

View File

@@ -0,0 +1,67 @@
-- Phase 2 of "Handling the Unknowns": the schema foundation.
-- Consolidates every new import/precision/attribution/identity column into ONE
-- migration with a single owner so downstream phases (importer, rendering, persons
-- directory) compile against a finished, collision-free schema. See ADR-025.
--
-- This file is forward-only and immutable once shipped (Flyway checksum model):
-- any fix goes in a later version, never an edit here.
-- ─── documents: date precision, range end, raw date, raw attribution ──────────
-- Range end is only set for RANGE precision (open-ended ranges allowed → end may be null).
ALTER TABLE documents ADD COLUMN meta_date_end date;
-- Original date cell, verbatim, for provenance and "as written" display (Phase 4).
ALTER TABLE documents ADD COLUMN meta_date_raw text;
-- Raw attribution preserved even when a person is linked.
ALTER TABLE documents ADD COLUMN sender_text text;
ALTER TABLE documents ADD COLUMN receiver_text text;
-- Bound user-influenced spreadsheet text at the DB layer (mirrors transcription_blocks
-- length cap in V18). Defense in depth against malformed/huge import cells.
ALTER TABLE documents ADD CONSTRAINT chk_meta_date_raw_length CHECK (length(meta_date_raw) <= 10000);
ALTER TABLE documents ADD CONSTRAINT chk_sender_text_length CHECK (length(sender_text) <= 10000);
ALTER TABLE documents ADD CONSTRAINT chk_receiver_text_length CHECK (length(receiver_text) <= 10000);
-- Precision enum — added with a DB default of 'UNKNOWN', backfilled, then made NOT NULL.
-- The DEFAULT serves two purposes: (1) existing rows get 'UNKNOWN' immediately, and
-- (2) raw-SQL inserts that omit the column (test fixtures, ad-hoc data loads) get a sane,
-- CHECK-valid value instead of violating the NOT NULL constraint. JPA saves still set it
-- explicitly via the entity's @Builder.Default = DatePrecision.UNKNOWN.
ALTER TABLE documents ADD COLUMN meta_date_precision varchar(16) DEFAULT 'UNKNOWN';
UPDATE documents
SET meta_date_precision = CASE WHEN meta_date IS NOT NULL THEN 'DAY' ELSE 'UNKNOWN' END;
ALTER TABLE documents ALTER COLUMN meta_date_precision SET NOT NULL;
-- Fail-closed allowlist of the seven precision values (verbatim mirror of the
-- normalizer's Precision enum). The DB enforces validity independent of the Java enum.
ALTER TABLE documents ADD CONSTRAINT chk_meta_date_precision
CHECK (meta_date_precision IN ('DAY', 'MONTH', 'SEASON', 'YEAR', 'RANGE', 'APPROX', 'UNKNOWN'));
-- A non-null range end is permitted only when precision = RANGE. A RANGE row MAY have a
-- null end (open-ended range), so the rule is one-directional, not biconditional.
ALTER TABLE documents ADD CONSTRAINT chk_meta_date_end_only_for_range
CHECK (meta_date_end IS NULL OR meta_date_precision = 'RANGE');
-- For ranges with both endpoints, the end must not precede the start.
ALTER TABLE documents ADD CONSTRAINT chk_meta_date_end_after_start
CHECK (meta_date_end IS NULL OR meta_date IS NULL OR meta_date_end >= meta_date);
-- ─── persons: source_ref (import identity) + provisional flag ─────────────────
-- The normalizer person_id: join key for documents → persons and idempotency key for
-- re-import. Nullable (manually created persons never have one); unique among non-nulls.
ALTER TABLE persons ADD COLUMN source_ref varchar(255);
CREATE UNIQUE INDEX idx_persons_source_ref ON persons (source_ref);
-- A provisional person is one the importer inferred but could not confidently identify.
-- Stays false until Phase 3 (importer) sets it; no code path writes true in this phase.
ALTER TABLE persons ADD COLUMN provisional boolean NOT NULL DEFAULT false;
-- ─── tag: source_ref (import identity, keyed on canonical tag_path) ───────────
ALTER TABLE tag ADD COLUMN source_ref varchar(255);
CREATE UNIQUE INDEX idx_tag_source_ref ON tag (source_ref);

View File

@@ -479,6 +479,191 @@ class MigrationIntegrationTest {
assertThat(count).isEqualTo(1); assertThat(count).isEqualTo(1);
} }
// ─── V69: import/precision/attribution/identity schema foundation ────────
@Test
void v69_metaDatePrecisionColumn_isNotNull() {
Integer count = jdbc.queryForObject(
"""
SELECT COUNT(*) FROM information_schema.columns
WHERE table_schema = 'public'
AND table_name = 'documents'
AND column_name = 'meta_date_precision'
AND is_nullable = 'NO'
""",
Integer.class);
assertThat(count).isEqualTo(1);
}
@Test
void v69_backfillSql_setsDatedRowsToDayPrecision() {
// Re-run the migration's backfill UPDATE on a freshly dated row to prove the rule.
UUID docId = createDocumentWithDate("1943-05-12");
jdbc.update(V69_BACKFILL_PRECISION_SQL);
String precision = jdbc.queryForObject(
"SELECT meta_date_precision FROM documents WHERE id = ?", String.class, docId);
assertThat(precision).isEqualTo("DAY");
}
@Test
void v69_backfillSql_setsUndatedRowsToUnknownPrecision() {
UUID docId = createDocument(); // no meta_date
jdbc.update(V69_BACKFILL_PRECISION_SQL);
String precision = jdbc.queryForObject(
"SELECT meta_date_precision FROM documents WHERE id = ?", String.class, docId);
assertThat(precision).isEqualTo("UNKNOWN");
}
// Mirrors the backfill UPDATE shipped in V69; idempotent for verification.
private static final String V69_BACKFILL_PRECISION_SQL = """
UPDATE documents
SET meta_date_precision = CASE WHEN meta_date IS NOT NULL THEN 'DAY' ELSE 'UNKNOWN' END
""";
@Test
void v69_precisionCheck_rejectsValueOutsideEnum() {
UUID docId = createDocument();
assertThatThrownBy(() ->
jdbc.update("UPDATE documents SET meta_date_precision = 'BOGUS' WHERE id = ?", docId)
).isInstanceOf(DataIntegrityViolationException.class);
}
@Test
void v69_metaDateEndCheck_rejectsNonNullEndWhenPrecisionNotRange() {
UUID docId = createDocumentWithDate("1943-05-12"); // precision DAY
assertThatThrownBy(() ->
jdbc.update("UPDATE documents SET meta_date_end = '1943-06-01' WHERE id = ?", docId)
).isInstanceOf(DataIntegrityViolationException.class);
}
@Test
void v69_metaDateEndCheck_allowsNonNullEndWhenPrecisionRange() {
UUID docId = createDocumentWithDate("1943-05-12");
int rows = jdbc.update(
"UPDATE documents SET meta_date_precision = 'RANGE', meta_date_end = '1943-06-01' WHERE id = ?",
docId);
assertThat(rows).isEqualTo(1);
}
@Test
void v69_metaDateEndCheck_allowsRangeWithNullEnd() {
// Loose semantics: the normalizer may emit an open-ended RANGE (start only).
UUID docId = createDocumentWithDate("1943-05-12");
int rows = jdbc.update(
"UPDATE documents SET meta_date_precision = 'RANGE' WHERE id = ?", docId);
assertThat(rows).isEqualTo(1);
}
@Test
void v69_metaDateEndCheck_allowsRangeWithBothEndpointsNull() {
// Fully-open RANGE: neither start (meta_date) nor end (meta_date_end) is set.
// Both CHECKs hold (end IS NULL passes chk_meta_date_end_only_for_range; both-null
// passes chk_meta_date_end_after_start), so the row survives. This locks the actual
// DB behavior so a future tightening to a biconditional rule is a deliberate change.
UUID docId = createDocument(); // null meta_date
int rows = jdbc.update(
"UPDATE documents SET meta_date_precision = 'RANGE' WHERE id = ?", docId);
assertThat(rows).isEqualTo(1);
Object metaDate = jdbc.queryForObject("SELECT meta_date FROM documents WHERE id = ?", Object.class, docId);
Object metaDateEnd = jdbc.queryForObject(
"SELECT meta_date_end FROM documents WHERE id = ?", Object.class, docId);
assertThat(metaDate).isNull();
assertThat(metaDateEnd).isNull();
}
@Test
void v69_rangeOrderCheck_rejectsEndBeforeStart() {
UUID docId = createDocumentWithDate("1943-05-12");
assertThatThrownBy(() ->
jdbc.update(
"UPDATE documents SET meta_date_precision = 'RANGE', meta_date_end = '1943-01-01' WHERE id = ?",
docId)
).isInstanceOf(DataIntegrityViolationException.class);
}
@Test
void v69_metaDateRawCheck_rejectsOverlongText() {
UUID docId = createDocument();
String tooLong = "x".repeat(10001);
assertThatThrownBy(() ->
jdbc.update("UPDATE documents SET meta_date_raw = ? WHERE id = ?", tooLong, docId)
).isInstanceOf(DataIntegrityViolationException.class);
}
@Test
void v69_senderTextAndReceiverText_storeRawAttribution() {
UUID docId = createDocument();
int rows = jdbc.update(
"UPDATE documents SET sender_text = 'Oma Anna', receiver_text = 'Tante Grete' WHERE id = ?",
docId);
assertThat(rows).isEqualTo(1);
}
@Test
@Transactional(propagation = Propagation.NOT_SUPPORTED)
void v69_personsSourceRef_uniqueIndexRejectsDuplicate() {
jdbc.update(
"INSERT INTO persons (id, last_name, source_ref) VALUES (gen_random_uuid(), 'A', 'person:dup')");
try {
assertThatThrownBy(() ->
jdbc.update(
"INSERT INTO persons (id, last_name, source_ref) VALUES (gen_random_uuid(), 'B', 'person:dup')")
).isInstanceOf(DataIntegrityViolationException.class);
} finally {
jdbc.update("DELETE FROM persons WHERE source_ref = 'person:dup'");
}
}
@Test
@Transactional(propagation = Propagation.NOT_SUPPORTED)
void v69_personsSourceRef_allowsMultipleNulls() {
UUID a = createPerson("Null", "RefA");
UUID b = createPerson("Null", "RefB");
try {
String refA = jdbc.queryForObject("SELECT source_ref FROM persons WHERE id = ?", String.class, a);
String refB = jdbc.queryForObject("SELECT source_ref FROM persons WHERE id = ?", String.class, b);
assertThat(refA).isNull();
assertThat(refB).isNull();
} finally {
jdbc.update("DELETE FROM persons WHERE id IN (?, ?)", a, b);
}
}
@Test
void v69_personsProvisional_defaultsToFalse() {
UUID id = createPerson("Provisional", "Default");
Boolean provisional = jdbc.queryForObject(
"SELECT provisional FROM persons WHERE id = ?", Boolean.class, id);
assertThat(provisional).isFalse();
}
@Test
@Transactional(propagation = Propagation.NOT_SUPPORTED)
void v69_tagSourceRef_uniqueIndexRejectsDuplicate() {
jdbc.update("INSERT INTO tag (id, name, source_ref) VALUES (gen_random_uuid(), 'TagDupA', 'tag:dup')");
try {
assertThatThrownBy(() ->
jdbc.update("INSERT INTO tag (id, name, source_ref) VALUES (gen_random_uuid(), 'TagDupB', 'tag:dup')")
).isInstanceOf(DataIntegrityViolationException.class);
} finally {
jdbc.update("DELETE FROM tag WHERE source_ref = 'tag:dup'");
}
}
// ─── helpers ───────────────────────────────────────────────────────────── // ─── helpers ─────────────────────────────────────────────────────────────
private UUID createPerson(String firstName, String lastName) { private UUID createPerson(String firstName, String lastName) {
@@ -504,6 +689,12 @@ class MigrationIntegrationTest {
return doc.getId(); return doc.getId();
} }
private UUID createDocumentWithDate(String isoDate) {
UUID id = createDocument();
jdbc.update("UPDATE documents SET meta_date = ?::date WHERE id = ?", isoDate, id);
return id;
}
private UUID insertAnnotation(UUID docId) { private UUID insertAnnotation(UUID docId) {
UUID id = UUID.randomUUID(); UUID id = UUID.randomUUID();
jdbc.update(""" jdbc.update("""

View File

@@ -133,7 +133,8 @@ class DocumentControllerTest {
"Er schrieb einen langen Brief", List.of(), false, List.of(), List.of(), List.of(), null, List.of()); "Er schrieb einen langen Brief", List.of(), false, List.of(), List.of(), List.of(), null, List.of());
when(documentService.searchDocuments(any(), any(), any(), any(), any(), any(), any(), any(), any(), any(), any(), any())) when(documentService.searchDocuments(any(), any(), any(), any(), any(), any(), any(), any(), any(), any(), any(), any()))
.thenReturn(DocumentSearchResult.of(List.of(new DocumentListItem( .thenReturn(DocumentSearchResult.of(List.of(new DocumentListItem(
docId, "Brief an Anna", "brief.pdf", null, null, null, docId, "Brief an Anna", "brief.pdf", null, null,
DatePrecision.UNKNOWN, null, null,
List.of(), List.of(), null, null, null, null, List.of(), List.of(), null, null, null, null,
0, List.of(), matchData)))); 0, List.of(), matchData))));
@@ -151,7 +152,8 @@ class DocumentControllerTest {
var matchData = new SearchMatchData(null, List.of(), false, List.of(), List.of(), List.of(), null, List.of()); var matchData = new SearchMatchData(null, List.of(), false, List.of(), List.of(), List.of(), null, List.of());
when(documentService.searchDocuments(any(), any(), any(), any(), any(), any(), any(), any(), any(), any(), any(), any())) when(documentService.searchDocuments(any(), any(), any(), any(), any(), any(), any(), any(), any(), any(), any(), any()))
.thenReturn(DocumentSearchResult.of(List.of(new DocumentListItem( .thenReturn(DocumentSearchResult.of(List.of(new DocumentListItem(
docId, "Brief an Anna", "brief.pdf", null, null, null, docId, "Brief an Anna", "brief.pdf", null, null,
DatePrecision.UNKNOWN, null, null,
List.of(), List.of(), null, null, null, null, List.of(), List.of(), null, null, null, null,
0, List.of(), matchData)))); 0, List.of(), matchData))));

View File

@@ -81,6 +81,28 @@ class DocumentListItemIntegrationTest {
assertThat(item.title()).isEqualTo("Kurrent Brief"); assertThat(item.title()).isEqualTo("Kurrent Brief");
} }
@Test
void search_listItem_carriesMetaDatePrecisionAndEnd() {
documentRepository.save(Document.builder()
.title("Range Brief")
.originalFilename("range.pdf")
.status(DocumentStatus.UPLOADED)
.documentDate(java.time.LocalDate.of(1943, 1, 1))
.metaDatePrecision(DatePrecision.RANGE)
.metaDateEnd(java.time.LocalDate.of(1943, 12, 31))
.build());
DocumentSearchResult result = documentService.searchDocuments(
null, null, null, null, null, null, null, null,
DocumentSort.DATE, "DESC", null,
PageRequest.of(0, 50));
DocumentListItem item = result.items().stream()
.filter(i -> i.title().equals("Range Brief")).findFirst().orElseThrow();
assertThat(item.metaDatePrecision()).isEqualTo(DatePrecision.RANGE);
assertThat(item.metaDateEnd()).isEqualTo(java.time.LocalDate.of(1943, 12, 31));
}
@Test @Test
void detail_stillReturnsTrainingLabels() { void detail_stillReturnsTrainingLabels() {
Document saved = documentRepository.save(Document.builder() Document saved = documentRepository.save(Document.builder()

View File

@@ -14,7 +14,8 @@ class DocumentSearchResultTest {
private DocumentListItem item(UUID docId) { private DocumentListItem item(UUID docId) {
return new DocumentListItem( return new DocumentListItem(
docId, "Test", "test.pdf", null, null, null, docId, "Test", "test.pdf", null, null,
DatePrecision.UNKNOWN, null, null,
List.of(), List.of(), null, null, null, null, List.of(), List.of(), null, null, null, null,
0, List.of(), SearchMatchData.empty()); 0, List.of(), SearchMatchData.empty());
} }
@@ -64,7 +65,8 @@ class DocumentSearchResultTest {
UUID id = UUID.randomUUID(); UUID id = UUID.randomUUID();
ActivityActorDTO actor = new ActivityActorDTO("AB", "#f00", "Anna Braun"); ActivityActorDTO actor = new ActivityActorDTO("AB", "#f00", "Anna Braun");
DocumentListItem item = new DocumentListItem( DocumentListItem item = new DocumentListItem(
id, "T", "t.pdf", null, null, null, id, "T", "t.pdf", null, null,
DatePrecision.UNKNOWN, null, null,
List.of(), List.of(), null, null, null, null, List.of(), List.of(), null, null, null, null,
75, List.of(actor), SearchMatchData.empty()); 75, List.of(actor), SearchMatchData.empty());

View File

@@ -117,6 +117,7 @@ class PersonControllerTest {
public Integer getDeathYear() { return null; } public Integer getDeathYear() { return null; }
public String getNotes() { return null; } public String getNotes() { return null; }
public boolean isFamilyMember() { return false; } public boolean isFamilyMember() { return false; }
public boolean isProvisional() { return false; }
public long getDocumentCount() { return 0; } public long getDocumentCount() { return 0; }
}; };
} }

View File

@@ -463,4 +463,46 @@ class PersonRepositoryTest {
assertThat(result).hasSize(1); assertThat(result).hasSize(1);
assertThat(result.get(0).getLastName()).isEqualTo("Gesellschafter des Verlages"); assertThat(result.get(0).getLastName()).isEqualTo("Gesellschafter des Verlages");
} }
// ─── #671: provisional must be SELECTed in all three native projections ───
// Adding isProvisional() to the interface compiles even if a native query forgets
// to SELECT p.provisional — it then silently returns false. These tests are the only
// guard against that trap, so they must run against real Postgres.
@Test
void findAllWithDocumentCount_projectsProvisionalTrue() {
personRepository.save(Person.builder()
.firstName("Inferred").lastName("Person").provisional(true).build());
List<PersonSummaryDTO> result = personRepository.findAllWithDocumentCount();
assertThat(result).anyMatch(PersonSummaryDTO::isProvisional);
}
@Test
void searchWithDocumentCount_projectsProvisionalTrue() {
personRepository.save(Person.builder()
.firstName("Provisorisch").lastName("Müller").provisional(true).build());
List<PersonSummaryDTO> result = personRepository.searchWithDocumentCount("Provisorisch");
assertThat(result).hasSize(1);
assertThat(result.get(0).isProvisional()).isTrue();
}
@Test
void findTopByDocumentCount_projectsProvisionalTrue() {
Person provisional = personRepository.save(Person.builder()
.firstName("Top").lastName("Provisional").provisional(true).build());
documentRepository.save(Document.builder()
.title("Brief").originalFilename("b.pdf")
.status(DocumentStatus.UPLOADED)
.sender(provisional).build());
List<PersonSummaryDTO> result = personRepository.findTopByDocumentCount(10);
PersonSummaryDTO summary = result.stream()
.filter(p -> p.getId().equals(provisional.getId())).findFirst().orElseThrow();
assertThat(summary.isProvisional()).isTrue();
}
} }

View File

@@ -25,6 +25,11 @@ _Not to be confused with [AppUser](#appuser-appuser)_ — `Person` is a historic
**UserGroup** (`UserGroup`) — a named permission bundle assigned to one or more `AppUser`s. A user's effective permissions are the union of all permissions across all groups they belong to. **UserGroup** (`UserGroup`) — a named permission bundle assigned to one or more `AppUser`s. A user's effective permissions are the union of all permissions across all groups they belong to.
**source_ref** (`Person.sourceRef`, `Tag.sourceRef`) — the import normalizer's stable identity for a `Person` (its `person_id`) or `Tag` (its canonical `tag_path`). It is the join key linking normalized records to documents and the idempotency key for re-import; null for manually created records and unique among non-null values.
**provisional person** (`Person.provisional`) — a `Person` the importer inferred from raw attribution text but could not confidently match to a known individual. The flag lets the persons directory surface uncertainty honestly rather than fabricate a confident identity; it defaults to `false` and is set `true` only by the importer.
_Not to be confused with `family_member`_ — `provisional` expresses import confidence, while `family_member` is a genealogical fact about whether the person belongs to the family tree.
--- ---
## Document-Related Terms ## Document-Related Terms
@@ -36,6 +41,10 @@ _See also [TranscriptionBlock](#transcriptionblock-transcriptionblock)._
**Document** (`Document`) — a single archival item (letter, postcard, photograph) with a file stored in MinIO/S3 and associated metadata (sender, receivers, date, tags, transcription blocks). **Document** (`Document`) — a single archival item (letter, postcard, photograph) with a file stored in MinIO/S3 and associated metadata (sender, receivers, date, tags, transcription blocks).
**date precision** (`Document.metaDatePrecision`, enum `DatePrecision`) — how exactly a document's date is known, one of `DAY, MONTH, SEASON, YEAR, RANGE, APPROX, UNKNOWN`. A verbatim mirror of the import normalizer's `Precision` enum so honest dates can be rendered (`APPROX` → "ca.", `RANGE` uses `meta_date_end`) instead of fabricating a false `DAY`-level date. `UNKNOWN` is the explicit value for undated documents.
**raw attribution** (`Document.senderText`, `Document.receiverText`, `Document.metaDateRaw`) — the original spreadsheet cell text for a document's sender, receiver, and date, preserved verbatim even after a `Person` or normalized date is linked. It keeps provenance intact and enables an "as written in the original" view.
**DocumentVersion** (`DocumentVersion`) — an append-only snapshot of a `Document`'s metadata at a point in time. Append-only by convention; no consumer-facing create or update endpoint exists. The entity uses Lombok `@Data` (which generates setters), so immutability is enforced by application convention, not at the Java level. **DocumentVersion** (`DocumentVersion`) — an append-only snapshot of a `Document`'s metadata at a point in time. Append-only by convention; no consumer-facing create or update endpoint exists. The entity uses Lombok `@Data` (which generates setters), so immutability is enforced by application convention, not at the Java level.
**Tag** (`Tag`) — a hierarchical category that can be applied to `Document`s. Tags are self-referencing via a `parent_id` foreign key, forming a tree structure. **Tag** (`Tag`) — a hierarchical category that can be applied to `Document`s. Tags are self-referencing via a `parent_id` foreign key, forming a tree structure.

View File

@@ -0,0 +1,83 @@
# ADR-025 — Canonical Import Output as Contract & Single-Migration Schema Foundation
**Date:** 2026-05-27
**Status:** Accepted
**Issue:** #671
**Milestone:** Handling the Unknowns — honest uncertainty in dates & people
---
## Context
The "Handling the Unknowns" milestone introduces honest uncertainty into the archive:
documents whose dates are known only approximately or as a range, and people the importer
infers from raw attribution text but cannot confidently identify. Three sibling issues —
date precision (#666), name triage (#665), and the importer (#669) — each independently
planned a Flyway `V69` migration that altered `persons`. Three `V69`s is a boot failure
(Flyway versions must be unique), and `persons.provisional` was at risk of being defined
twice.
Two durable decisions had to be made before any application code in Phases 36 could
compile against the new schema.
---
## Decision
### 1. All import/precision/attribution/identity schema lives in ONE migration with a single owner
`V69__import_precision_attribution_identity_schema.sql` adds every new column for this
milestone in a single, atomic, forward-only migration:
- `documents`: `meta_date_precision` (backfilled `DAY` where dated / `UNKNOWN` where not,
then `NOT NULL`), `meta_date_end`, `meta_date_raw`, `sender_text`, `receiver_text`.
- `persons`: `source_ref` (unique index, nullable), `provisional` (`NOT NULL DEFAULT false`).
- `tag`: `source_ref` (unique index, nullable).
Integrity is pushed to the database as fail-closed `CHECK` constraints (the precedent is
`V22`'s `person_type` allowlist):
- `meta_date_precision` must be one of the seven enum values.
- `meta_date_end` may be non-null **only** when precision = `RANGE` (one-directional, not
biconditional — see Consequences).
- `meta_date_end >= meta_date` for ranges with both endpoints (a `CHECK`, not a trigger).
- `meta_date_raw`, `sender_text`, `receiver_text` are length-capped at 10 000 (mirrors the
`transcription_blocks` cap in `V18`).
No sibling issue adds another migration that alters `persons` or `documents` in this
milestone.
### 2. The backend `DatePrecision` enum is a verbatim mirror of the normalizer's `Precision`; the canonical output is the contract
The importer reads the Python normalizer's canonical output
(`tools/import-normalizer/`). The backend `DatePrecision` enum
(`DAY, MONTH, SEASON, YEAR, RANGE, APPROX, UNKNOWN`) is a verbatim copy of the normalizer's
`Precision(StrEnum)` (`dates.py`). There is **no translation layer**: the normalizer's
output strings are persisted as-is. The same applies to `source_ref`, which carries the
normalizer's `person_id` / canonical `tag_path` unchanged as the re-import idempotency key.
---
## Consequences
- **RANGE is one-directional, not biconditional.** A `RANGE` row may have a null
`meta_date_end` (an open-ended range with only a start), because the normalizer can emit
start-only ranges. A biconditional `RANGE ⟺ end IS NOT NULL` rule would reject valid
normalizer output, so it was rejected. Phase 4 rendering must handle a `RANGE` with no end
gracefully.
- **`provisional` stays `false` throughout this phase.** The column and flag exist, but no
code path sets it `true`; the importer (Phase 3) is the only writer. This is intentional,
not a half-built feature.
- **A future dev must not "improve" the enum.** Renaming or dropping a `DatePrecision` value
without changing the normalizer silently breaks import idempotency and date rendering. The
enum's Javadoc states this; the DB `CHECK` enforces validity independent of the Java enum.
- **`source_ref` is unique + nullable.** Manually created persons/tags have `source_ref =
NULL`; Postgres allows multiple NULLs under a plain unique index, so no backfill is needed.
- **Forward-only.** The migration is immutable once shipped (Flyway checksum model); any fix
goes in a later version. There is no down-migration — rollback means restoring from the
nightly `pg_dump`, the standard procedure.
- **`PersonSummaryDTO` coupling.** `provisional` was added to the `PersonSummaryDTO` native
interface projection; because the projection is backed by native SQL, the column had to be
added to all three native `SELECT`s (`findAllWithDocumentCount`, `searchWithDocumentCount`,
`findTopByDocumentCount`) or it would silently return `false`. Guarded by integration tests
against real Postgres.

View File

@@ -1,6 +1,6 @@
@startuml db-orm @startuml db-orm
' Schema source: Flyway V1V60 (excl. V37, V43 — intentionally removed) ' Schema source: Flyway V1V69 (excl. V37, V43 — intentionally removed)
' Schema as of: V60 (2026-05-06) ' Schema as of: V69 (2026-05-27)
' ⚠ This is a versioned snapshot. Update when the schema changes significantly. ' ⚠ This is a versioned snapshot. Update when the schema changes significantly.
hide circle hide circle
@@ -88,6 +88,11 @@ package "Documents" {
summary : TEXT summary : TEXT
transcription : TEXT transcription : TEXT
meta_date : DATE meta_date : DATE
meta_date_precision : VARCHAR(16) NOT NULL
meta_date_end : DATE
meta_date_raw : TEXT
sender_text : TEXT
receiver_text : TEXT
meta_location : VARCHAR(255) meta_location : VARCHAR(255)
meta_document_location : VARCHAR(255) meta_document_location : VARCHAR(255)
archive_box : VARCHAR(255) archive_box : VARCHAR(255)
@@ -182,6 +187,8 @@ package "Persons" {
birth_year : INTEGER birth_year : INTEGER
death_year : INTEGER death_year : INTEGER
family_member : BOOLEAN NOT NULL family_member : BOOLEAN NOT NULL
source_ref : VARCHAR(255) UNIQUE
provisional : BOOLEAN NOT NULL
} }
entity person_name_aliases { entity person_name_aliases {
@@ -217,6 +224,7 @@ package "Tags" {
name : VARCHAR(255) NOT NULL UNIQUE name : VARCHAR(255) NOT NULL UNIQUE
parent_id : UUID <<FK>> parent_id : UUID <<FK>>
color : VARCHAR(20) color : VARCHAR(20)
source_ref : VARCHAR(255) UNIQUE
} }
} }

View File

@@ -1,7 +1,9 @@
@startuml db-relationships @startuml db-relationships
' Schema source: Flyway V1V60 (excl. V37, V43 — intentionally removed) ' Schema source: Flyway V1V69 (excl. V37, V43 — intentionally removed)
' Schema as of: V60 (2026-05-06) ' Schema as of: V69 (2026-05-27)
' ⚠ This is a versioned snapshot. Update when the schema changes significantly. ' ⚠ This is a versioned snapshot. Update when the schema changes significantly.
' Note: V69 adds columns only (persons.source_ref, tag.source_ref, document
' precision/attribution fields); no new FK relationships, so this diagram is unchanged.
hide circle hide circle
skinparam linetype ortho skinparam linetype ortho

View File

@@ -1636,6 +1636,7 @@ export interface components {
/** Format: uuid */ /** Format: uuid */
parentId?: string; parentId?: string;
color?: string; color?: string;
sourceRef?: string;
}; };
PersonUpdateDTO: { PersonUpdateDTO: {
/** @enum {string} */ /** @enum {string} */
@@ -1665,12 +1666,21 @@ export interface components {
/** Format: int32 */ /** Format: int32 */
deathYear?: number; deathYear?: number;
familyMember: boolean; familyMember: boolean;
sourceRef?: string;
provisional: boolean;
readonly displayName: string; readonly displayName: string;
}; };
DocumentUpdateDTO: { DocumentUpdateDTO: {
title?: string; title?: string;
/** Format: date */ /** Format: date */
documentDate?: string; documentDate?: string;
/** @enum {string} */
metaDatePrecision?: "DAY" | "MONTH" | "SEASON" | "YEAR" | "RANGE" | "APPROX" | "UNKNOWN";
/** Format: date */
metaDateEnd?: string;
metaDateRaw?: string;
senderText?: string;
receiverText?: string;
location?: string; location?: string;
documentLocation?: string; documentLocation?: string;
archiveBox?: string; archiveBox?: string;
@@ -1704,6 +1714,13 @@ export interface components {
status: "PLACEHOLDER" | "UPLOADED" | "TRANSCRIBED" | "REVIEWED" | "ARCHIVED"; status: "PLACEHOLDER" | "UPLOADED" | "TRANSCRIBED" | "REVIEWED" | "ARCHIVED";
/** Format: date */ /** Format: date */
documentDate?: string; documentDate?: string;
/** @enum {string} */
metaDatePrecision: "DAY" | "MONTH" | "SEASON" | "YEAR" | "RANGE" | "APPROX" | "UNKNOWN";
/** Format: date */
metaDateEnd?: string;
metaDateRaw?: string;
senderText?: string;
receiverText?: string;
location?: string; location?: string;
documentLocation?: string; documentLocation?: string;
archiveBox?: string; archiveBox?: string;
@@ -2024,6 +2041,10 @@ export interface components {
receiverIds?: string[]; receiverIds?: string[];
/** Format: date */ /** Format: date */
documentDate?: string; documentDate?: string;
/** @enum {string} */
metaDatePrecision?: "DAY" | "MONTH" | "SEASON" | "YEAR" | "RANGE" | "APPROX" | "UNKNOWN";
/** Format: date */
metaDateEnd?: string;
location?: string; location?: string;
tagNames?: string[]; tagNames?: string[];
metadataComplete?: boolean; metadataComplete?: boolean;
@@ -2221,6 +2242,7 @@ export interface components {
notes?: string; notes?: string;
personType?: string; personType?: string;
familyMember?: boolean; familyMember?: boolean;
provisional?: boolean;
}; };
InferredRelationshipWithPersonDTO: { InferredRelationshipWithPersonDTO: {
person: components["schemas"]["PersonNodeDTO"]; person: components["schemas"]["PersonNodeDTO"];
@@ -2396,6 +2418,10 @@ export interface components {
thumbnailUrl?: string; thumbnailUrl?: string;
/** Format: date */ /** Format: date */
documentDate?: string; documentDate?: string;
/** @enum {string} */
metaDatePrecision: "DAY" | "MONTH" | "SEASON" | "YEAR" | "RANGE" | "APPROX" | "UNKNOWN";
/** Format: date */
metaDateEnd?: string;
sender?: components["schemas"]["Person"]; sender?: components["schemas"]["Person"];
receivers: components["schemas"]["Person"][]; receivers: components["schemas"]["Person"][];
tags: components["schemas"]["Tag"][]; tags: components["schemas"]["Tag"][];

View File

@@ -16,6 +16,7 @@ const baseDoc: Document = {
title: 'Brief an Hans', title: 'Brief an Hans',
originalFilename: 'brief.pdf', originalFilename: 'brief.pdf',
status: 'UPLOADED', status: 'UPLOADED',
metaDatePrecision: 'UNKNOWN',
metadataComplete: true, metadataComplete: true,
scriptType: 'HANDWRITING_KURRENT', scriptType: 'HANDWRITING_KURRENT',
createdAt: '2025-01-01T12:00:00Z', createdAt: '2025-01-01T12:00:00Z',
@@ -127,7 +128,8 @@ describe('ReaderRecentDocs', () => {
firstName: 'Anna', firstName: 'Anna',
displayName: 'Anna Müller', displayName: 'Anna Müller',
personType: 'PERSON' as const, personType: 'PERSON' as const,
familyMember: false familyMember: false,
provisional: false
} }
}; };
render(ReaderRecentDocs, { documents: [docWithSender] }); render(ReaderRecentDocs, { documents: [docWithSender] });

View File

@@ -20,6 +20,7 @@ const makePerson = (id: string, name: string, overrides: Partial<Person> = {}):
displayName: name, displayName: name,
personType: 'PERSON', personType: 'PERSON',
familyMember: false, familyMember: false,
provisional: false,
...overrides ...overrides
}; };
}; };

View File

@@ -34,6 +34,7 @@ const AUGUSTE: Person = {
displayName: 'Auguste Raddatz', displayName: 'Auguste Raddatz',
personType: 'PERSON', personType: 'PERSON',
familyMember: false, familyMember: false,
provisional: false,
birthYear: 1882, birthYear: 1882,
deathYear: 1944 deathYear: 1944
}; };
@@ -45,6 +46,7 @@ const ANNA: Person = {
displayName: 'Anna Schmidt', displayName: 'Anna Schmidt',
personType: 'PERSON', personType: 'PERSON',
familyMember: false, familyMember: false,
provisional: false,
birthYear: 1860 birthYear: 1860
}; };