feat(schema): one migration + domain model for import/precision/identity (Phase 2, #671) #673

Merged
marcel merged 10 commits from feature/671-schema-foundation into docs/import-migration 2026-05-27 10:13:54 +02:00
25 changed files with 545 additions and 13 deletions

View File

@@ -0,0 +1,17 @@
package org.raddatz.familienarchiv.document;
/**
* Precision of a document's date. Verbatim mirror of the import normalizer's
* {@code Precision} enum (tools/import-normalizer/dates.py) — the canonical output is the
* contract, so there is no translation layer. Do not add, remove, or rename values without
* also changing the normalizer; a mismatch silently breaks import idempotency (see ADR-025).
*/
public enum DatePrecision {
DAY,
MONTH,
SEASON,
YEAR,
RANGE,
APPROX,
UNKNOWN
}

View File

@@ -91,6 +91,29 @@ public class Document {
@Column(name = "meta_date")
private LocalDate documentDate; // Wann wurde der Brief geschrieben?
// Precision of documentDate — drives honest rendering ("ca. 1943", "Frühjahr 1943").
// Verbatim mirror of the normalizer's Precision enum (see ADR-025).
@Enumerated(EnumType.STRING)
@Column(name = "meta_date_precision", nullable = false, length = 16)
@Schema(requiredMode = Schema.RequiredMode.REQUIRED)
@Builder.Default
private DatePrecision metaDatePrecision = DatePrecision.UNKNOWN;
// Range end — only set when metaDatePrecision is RANGE (open-ended ranges allowed → may be null).
@Column(name = "meta_date_end")
private LocalDate metaDateEnd;
// Original date cell, verbatim, preserved for provenance and "as written" display.
@Column(name = "meta_date_raw", columnDefinition = "TEXT")
private String metaDateRaw;
// Raw attribution preserved even when a person is linked via sender/receivers.
@Column(name = "sender_text", columnDefinition = "TEXT")
private String senderText;
@Column(name = "receiver_text", columnDefinition = "TEXT")
private String receiverText;
@Column(name = "meta_location")
private String location;

View File

@@ -12,6 +12,8 @@ public class DocumentBatchMetadataDTO {
private UUID senderId;
private List<UUID> receiverIds;
private LocalDate documentDate;
private DatePrecision metaDatePrecision;
private LocalDate metaDateEnd;
private String location;
private List<String> tagNames;
private Boolean metadataComplete;

View File

@@ -18,6 +18,9 @@ public record DocumentListItem(
String originalFilename,
String thumbnailUrl,
LocalDate documentDate,
@Schema(requiredMode = Schema.RequiredMode.REQUIRED)
DatePrecision metaDatePrecision,
LocalDate metaDateEnd,
Person sender,
@Schema(requiredMode = Schema.RequiredMode.REQUIRED)
List<Person> receivers,

View File

@@ -758,6 +758,8 @@ public class DocumentService {
doc.getOriginalFilename(),
doc.getThumbnailUrl(),
doc.getDocumentDate(),
doc.getMetaDatePrecision(),
doc.getMetaDateEnd(),
doc.getSender(),
List.copyOf(doc.getReceivers()),
List.copyOf(doc.getTags()),

View File

@@ -11,6 +11,11 @@ import org.raddatz.familienarchiv.ocr.ScriptType;
public class DocumentUpdateDTO {
private String title;
private LocalDate documentDate;
private DatePrecision metaDatePrecision;
private LocalDate metaDateEnd;
private String metaDateRaw;
private String senderText;
private String receiverText;
private String location;
private String documentLocation;
private String archiveBox;

View File

@@ -57,6 +57,18 @@ public class Person {
@Schema(requiredMode = Schema.RequiredMode.REQUIRED)
private boolean familyMember = false;
// The normalizer person_id — join key and re-import idempotency key. Null for manually
// created persons; unique among non-null values (see ADR-025).
@Column(name = "source_ref")
private String sourceRef;
// A provisional person is one the importer inferred but could not confidently identify.
// Distinct from familyMember (a genealogical fact); set true only by the importer (Phase 3).
@Column(name = "provisional", nullable = false)
@Builder.Default
@Schema(requiredMode = Schema.RequiredMode.REQUIRED)
private boolean provisional = false;
// Entity-graph navigation for JPA JOIN queries (e.g. DocumentSpecifications.hasText).
// Uses entity relationship rather than cross-domain repository access, avoiding a
// separate DB roundtrip while respecting domain boundaries.

View File

@@ -41,7 +41,7 @@ public interface PersonRepository extends JpaRepository<Person, UUID> {
SELECT p.id, p.title, p.first_name AS firstName, p.last_name AS lastName,
p.person_type AS personType,
p.alias, p.birth_year AS birthYear, p.death_year AS deathYear, p.notes,
p.family_member AS familyMember,
p.family_member AS familyMember, p.provisional AS provisional,
(SELECT COUNT(*) FROM documents d WHERE d.sender_id = p.id)
+ (SELECT COUNT(*) FROM document_receivers dr WHERE dr.person_id = p.id) AS documentCount
FROM persons p
@@ -54,7 +54,7 @@ public interface PersonRepository extends JpaRepository<Person, UUID> {
SELECT p.id, p.title, p.first_name AS firstName, p.last_name AS lastName,
p.person_type AS personType,
p.alias, p.birth_year AS birthYear, p.death_year AS deathYear, p.notes,
p.family_member AS familyMember,
p.family_member AS familyMember, p.provisional AS provisional,
(SELECT COUNT(*) FROM documents d WHERE d.sender_id = p.id)
+ (SELECT COUNT(*) FROM document_receivers dr WHERE dr.person_id = p.id) AS documentCount
FROM persons p
@@ -63,7 +63,7 @@ public interface PersonRepository extends JpaRepository<Person, UUID> {
OR LOWER(CONCAT(p.last_name,' ',COALESCE(p.first_name,''))) LIKE LOWER(CONCAT('%',:query,'%'))
OR LOWER(p.alias) LIKE LOWER(CONCAT('%',:query,'%'))
OR LOWER(a.last_name) LIKE LOWER(CONCAT('%',:query,'%'))
GROUP BY p.id, p.title, p.first_name, p.last_name, p.person_type, p.alias, p.birth_year, p.death_year, p.notes, p.family_member
GROUP BY p.id, p.title, p.first_name, p.last_name, p.person_type, p.alias, p.birth_year, p.death_year, p.notes, p.family_member, p.provisional
ORDER BY p.last_name ASC, p.first_name ASC
""",
nativeQuery = true)
@@ -75,7 +75,7 @@ public interface PersonRepository extends JpaRepository<Person, UUID> {
SELECT p.id, p.title, p.first_name AS firstName, p.last_name AS lastName,
p.person_type AS personType,
p.alias, p.birth_year AS birthYear, p.death_year AS deathYear, p.notes,
p.family_member AS familyMember,
p.family_member AS familyMember, p.provisional AS provisional,
(SELECT COUNT(*) FROM documents d WHERE d.sender_id = p.id)
+ (SELECT COUNT(*) FROM document_receivers dr WHERE dr.person_id = p.id) AS documentCount
FROM persons p

View File

@@ -18,6 +18,7 @@ public interface PersonSummaryDTO {
Integer getDeathYear();
String getNotes();
boolean isFamilyMember();
boolean isProvisional();
long getDocumentCount();
default String getDisplayName() {

View File

@@ -30,4 +30,11 @@ public class Tag {
/** Color token name (e.g. "sage"), only set on root-level tags. Null means no color. */
private String color;
/**
* Import identity key, keyed on the canonical tag_path. Null for manually created tags;
* unique among non-null values. The importer (Phase 3) uses it for idempotent re-import.
*/
@Column(name = "source_ref")
private String sourceRef;
}

View File

@@ -0,0 +1,67 @@
-- Phase 2 of "Handling the Unknowns": the schema foundation.
-- Consolidates every new import/precision/attribution/identity column into ONE
-- migration with a single owner so downstream phases (importer, rendering, persons
-- directory) compile against a finished, collision-free schema. See ADR-025.
--
-- This file is forward-only and immutable once shipped (Flyway checksum model):
-- any fix goes in a later version, never an edit here.
-- ─── documents: date precision, range end, raw date, raw attribution ──────────
-- Range end is only set for RANGE precision (open-ended ranges allowed → end may be null).
ALTER TABLE documents ADD COLUMN meta_date_end date;
-- Original date cell, verbatim, for provenance and "as written" display (Phase 4).
ALTER TABLE documents ADD COLUMN meta_date_raw text;
-- Raw attribution preserved even when a person is linked.
ALTER TABLE documents ADD COLUMN sender_text text;
ALTER TABLE documents ADD COLUMN receiver_text text;
-- Bound user-influenced spreadsheet text at the DB layer (mirrors transcription_blocks
-- length cap in V18). Defense in depth against malformed/huge import cells.
ALTER TABLE documents ADD CONSTRAINT chk_meta_date_raw_length CHECK (length(meta_date_raw) <= 10000);
ALTER TABLE documents ADD CONSTRAINT chk_sender_text_length CHECK (length(sender_text) <= 10000);
ALTER TABLE documents ADD CONSTRAINT chk_receiver_text_length CHECK (length(receiver_text) <= 10000);
-- Precision enum — added with a DB default of 'UNKNOWN', backfilled, then made NOT NULL.
-- The DEFAULT serves two purposes: (1) existing rows get 'UNKNOWN' immediately, and
-- (2) raw-SQL inserts that omit the column (test fixtures, ad-hoc data loads) get a sane,
-- CHECK-valid value instead of violating the NOT NULL constraint. JPA saves still set it
-- explicitly via the entity's @Builder.Default = DatePrecision.UNKNOWN.
ALTER TABLE documents ADD COLUMN meta_date_precision varchar(16) DEFAULT 'UNKNOWN';
UPDATE documents
SET meta_date_precision = CASE WHEN meta_date IS NOT NULL THEN 'DAY' ELSE 'UNKNOWN' END;
ALTER TABLE documents ALTER COLUMN meta_date_precision SET NOT NULL;
-- Fail-closed allowlist of the seven precision values (verbatim mirror of the
-- normalizer's Precision enum). The DB enforces validity independent of the Java enum.
ALTER TABLE documents ADD CONSTRAINT chk_meta_date_precision
CHECK (meta_date_precision IN ('DAY', 'MONTH', 'SEASON', 'YEAR', 'RANGE', 'APPROX', 'UNKNOWN'));
-- A non-null range end is permitted only when precision = RANGE. A RANGE row MAY have a
-- null end (open-ended range), so the rule is one-directional, not biconditional.
ALTER TABLE documents ADD CONSTRAINT chk_meta_date_end_only_for_range
CHECK (meta_date_end IS NULL OR meta_date_precision = 'RANGE');
-- For ranges with both endpoints, the end must not precede the start.
ALTER TABLE documents ADD CONSTRAINT chk_meta_date_end_after_start
CHECK (meta_date_end IS NULL OR meta_date IS NULL OR meta_date_end >= meta_date);
-- ─── persons: source_ref (import identity) + provisional flag ─────────────────
-- The normalizer person_id: join key for documents → persons and idempotency key for
-- re-import. Nullable (manually created persons never have one); unique among non-nulls.
ALTER TABLE persons ADD COLUMN source_ref varchar(255);
CREATE UNIQUE INDEX idx_persons_source_ref ON persons (source_ref);
-- A provisional person is one the importer inferred but could not confidently identify.
-- Stays false until Phase 3 (importer) sets it; no code path writes true in this phase.
ALTER TABLE persons ADD COLUMN provisional boolean NOT NULL DEFAULT false;
-- ─── tag: source_ref (import identity, keyed on canonical tag_path) ───────────
ALTER TABLE tag ADD COLUMN source_ref varchar(255);
CREATE UNIQUE INDEX idx_tag_source_ref ON tag (source_ref);

View File

@@ -479,6 +479,191 @@ class MigrationIntegrationTest {
assertThat(count).isEqualTo(1);
}
// ─── V69: import/precision/attribution/identity schema foundation ────────
@Test
void v69_metaDatePrecisionColumn_isNotNull() {
Integer count = jdbc.queryForObject(
"""
SELECT COUNT(*) FROM information_schema.columns
WHERE table_schema = 'public'
AND table_name = 'documents'
AND column_name = 'meta_date_precision'
AND is_nullable = 'NO'
""",
Integer.class);
assertThat(count).isEqualTo(1);
}
@Test
void v69_backfillSql_setsDatedRowsToDayPrecision() {
// Re-run the migration's backfill UPDATE on a freshly dated row to prove the rule.
UUID docId = createDocumentWithDate("1943-05-12");
jdbc.update(V69_BACKFILL_PRECISION_SQL);
String precision = jdbc.queryForObject(
"SELECT meta_date_precision FROM documents WHERE id = ?", String.class, docId);
assertThat(precision).isEqualTo("DAY");
}
@Test
void v69_backfillSql_setsUndatedRowsToUnknownPrecision() {
UUID docId = createDocument(); // no meta_date
jdbc.update(V69_BACKFILL_PRECISION_SQL);
String precision = jdbc.queryForObject(
"SELECT meta_date_precision FROM documents WHERE id = ?", String.class, docId);
assertThat(precision).isEqualTo("UNKNOWN");
}
// Mirrors the backfill UPDATE shipped in V69; idempotent for verification.
private static final String V69_BACKFILL_PRECISION_SQL = """
UPDATE documents
SET meta_date_precision = CASE WHEN meta_date IS NOT NULL THEN 'DAY' ELSE 'UNKNOWN' END
""";
@Test
void v69_precisionCheck_rejectsValueOutsideEnum() {
UUID docId = createDocument();
assertThatThrownBy(() ->
jdbc.update("UPDATE documents SET meta_date_precision = 'BOGUS' WHERE id = ?", docId)
).isInstanceOf(DataIntegrityViolationException.class);
}
@Test
void v69_metaDateEndCheck_rejectsNonNullEndWhenPrecisionNotRange() {
UUID docId = createDocumentWithDate("1943-05-12"); // precision DAY
assertThatThrownBy(() ->
jdbc.update("UPDATE documents SET meta_date_end = '1943-06-01' WHERE id = ?", docId)
).isInstanceOf(DataIntegrityViolationException.class);
}
@Test
void v69_metaDateEndCheck_allowsNonNullEndWhenPrecisionRange() {
UUID docId = createDocumentWithDate("1943-05-12");
int rows = jdbc.update(
"UPDATE documents SET meta_date_precision = 'RANGE', meta_date_end = '1943-06-01' WHERE id = ?",
docId);
assertThat(rows).isEqualTo(1);
}
@Test
void v69_metaDateEndCheck_allowsRangeWithNullEnd() {
// Loose semantics: the normalizer may emit an open-ended RANGE (start only).
UUID docId = createDocumentWithDate("1943-05-12");
int rows = jdbc.update(
"UPDATE documents SET meta_date_precision = 'RANGE' WHERE id = ?", docId);
assertThat(rows).isEqualTo(1);
}
@Test
void v69_metaDateEndCheck_allowsRangeWithBothEndpointsNull() {
// Fully-open RANGE: neither start (meta_date) nor end (meta_date_end) is set.
// Both CHECKs hold (end IS NULL passes chk_meta_date_end_only_for_range; both-null
// passes chk_meta_date_end_after_start), so the row survives. This locks the actual
// DB behavior so a future tightening to a biconditional rule is a deliberate change.
UUID docId = createDocument(); // null meta_date
int rows = jdbc.update(
"UPDATE documents SET meta_date_precision = 'RANGE' WHERE id = ?", docId);
assertThat(rows).isEqualTo(1);
Object metaDate = jdbc.queryForObject("SELECT meta_date FROM documents WHERE id = ?", Object.class, docId);
Object metaDateEnd = jdbc.queryForObject(
"SELECT meta_date_end FROM documents WHERE id = ?", Object.class, docId);
assertThat(metaDate).isNull();
assertThat(metaDateEnd).isNull();
}
@Test
void v69_rangeOrderCheck_rejectsEndBeforeStart() {
UUID docId = createDocumentWithDate("1943-05-12");
assertThatThrownBy(() ->
jdbc.update(
"UPDATE documents SET meta_date_precision = 'RANGE', meta_date_end = '1943-01-01' WHERE id = ?",
docId)
).isInstanceOf(DataIntegrityViolationException.class);
}
@Test
void v69_metaDateRawCheck_rejectsOverlongText() {
UUID docId = createDocument();
String tooLong = "x".repeat(10001);
assertThatThrownBy(() ->
jdbc.update("UPDATE documents SET meta_date_raw = ? WHERE id = ?", tooLong, docId)
).isInstanceOf(DataIntegrityViolationException.class);
}
@Test
void v69_senderTextAndReceiverText_storeRawAttribution() {
UUID docId = createDocument();
int rows = jdbc.update(
"UPDATE documents SET sender_text = 'Oma Anna', receiver_text = 'Tante Grete' WHERE id = ?",
docId);
assertThat(rows).isEqualTo(1);
}
@Test
@Transactional(propagation = Propagation.NOT_SUPPORTED)
void v69_personsSourceRef_uniqueIndexRejectsDuplicate() {
jdbc.update(
"INSERT INTO persons (id, last_name, source_ref) VALUES (gen_random_uuid(), 'A', 'person:dup')");
try {
assertThatThrownBy(() ->
jdbc.update(
"INSERT INTO persons (id, last_name, source_ref) VALUES (gen_random_uuid(), 'B', 'person:dup')")
).isInstanceOf(DataIntegrityViolationException.class);
} finally {
jdbc.update("DELETE FROM persons WHERE source_ref = 'person:dup'");
}
}
@Test
@Transactional(propagation = Propagation.NOT_SUPPORTED)
void v69_personsSourceRef_allowsMultipleNulls() {
UUID a = createPerson("Null", "RefA");
UUID b = createPerson("Null", "RefB");
try {
String refA = jdbc.queryForObject("SELECT source_ref FROM persons WHERE id = ?", String.class, a);
String refB = jdbc.queryForObject("SELECT source_ref FROM persons WHERE id = ?", String.class, b);
assertThat(refA).isNull();
assertThat(refB).isNull();
} finally {
jdbc.update("DELETE FROM persons WHERE id IN (?, ?)", a, b);
}
}
@Test
void v69_personsProvisional_defaultsToFalse() {
UUID id = createPerson("Provisional", "Default");
Boolean provisional = jdbc.queryForObject(
"SELECT provisional FROM persons WHERE id = ?", Boolean.class, id);
assertThat(provisional).isFalse();
}
@Test
@Transactional(propagation = Propagation.NOT_SUPPORTED)
void v69_tagSourceRef_uniqueIndexRejectsDuplicate() {
jdbc.update("INSERT INTO tag (id, name, source_ref) VALUES (gen_random_uuid(), 'TagDupA', 'tag:dup')");
try {
assertThatThrownBy(() ->
jdbc.update("INSERT INTO tag (id, name, source_ref) VALUES (gen_random_uuid(), 'TagDupB', 'tag:dup')")
).isInstanceOf(DataIntegrityViolationException.class);
} finally {
jdbc.update("DELETE FROM tag WHERE source_ref = 'tag:dup'");
}
}
// ─── helpers ─────────────────────────────────────────────────────────────
private UUID createPerson(String firstName, String lastName) {
@@ -504,6 +689,12 @@ class MigrationIntegrationTest {
return doc.getId();
}
private UUID createDocumentWithDate(String isoDate) {
UUID id = createDocument();
jdbc.update("UPDATE documents SET meta_date = ?::date WHERE id = ?", isoDate, id);
return id;
}
private UUID insertAnnotation(UUID docId) {
UUID id = UUID.randomUUID();
jdbc.update("""

View File

@@ -133,7 +133,8 @@ class DocumentControllerTest {
"Er schrieb einen langen Brief", List.of(), false, List.of(), List.of(), List.of(), null, List.of());
when(documentService.searchDocuments(any(), any(), any(), any(), any(), any(), any(), any(), any(), any(), any(), any()))
.thenReturn(DocumentSearchResult.of(List.of(new DocumentListItem(
docId, "Brief an Anna", "brief.pdf", null, null, null,
docId, "Brief an Anna", "brief.pdf", null, null,
DatePrecision.UNKNOWN, null, null,
List.of(), List.of(), null, null, null, null,
0, List.of(), matchData))));
@@ -151,7 +152,8 @@ class DocumentControllerTest {
var matchData = new SearchMatchData(null, List.of(), false, List.of(), List.of(), List.of(), null, List.of());
when(documentService.searchDocuments(any(), any(), any(), any(), any(), any(), any(), any(), any(), any(), any(), any()))
.thenReturn(DocumentSearchResult.of(List.of(new DocumentListItem(
docId, "Brief an Anna", "brief.pdf", null, null, null,
docId, "Brief an Anna", "brief.pdf", null, null,
DatePrecision.UNKNOWN, null, null,
List.of(), List.of(), null, null, null, null,
0, List.of(), matchData))));

View File

@@ -81,6 +81,28 @@ class DocumentListItemIntegrationTest {
assertThat(item.title()).isEqualTo("Kurrent Brief");
}
@Test
void search_listItem_carriesMetaDatePrecisionAndEnd() {
documentRepository.save(Document.builder()
.title("Range Brief")
.originalFilename("range.pdf")
.status(DocumentStatus.UPLOADED)
.documentDate(java.time.LocalDate.of(1943, 1, 1))
.metaDatePrecision(DatePrecision.RANGE)
.metaDateEnd(java.time.LocalDate.of(1943, 12, 31))
.build());
DocumentSearchResult result = documentService.searchDocuments(
null, null, null, null, null, null, null, null,
DocumentSort.DATE, "DESC", null,
PageRequest.of(0, 50));
DocumentListItem item = result.items().stream()
.filter(i -> i.title().equals("Range Brief")).findFirst().orElseThrow();
assertThat(item.metaDatePrecision()).isEqualTo(DatePrecision.RANGE);
assertThat(item.metaDateEnd()).isEqualTo(java.time.LocalDate.of(1943, 12, 31));
}
@Test
void detail_stillReturnsTrainingLabels() {
Document saved = documentRepository.save(Document.builder()

View File

@@ -14,7 +14,8 @@ class DocumentSearchResultTest {
private DocumentListItem item(UUID docId) {
return new DocumentListItem(
docId, "Test", "test.pdf", null, null, null,
docId, "Test", "test.pdf", null, null,
DatePrecision.UNKNOWN, null, null,
List.of(), List.of(), null, null, null, null,
0, List.of(), SearchMatchData.empty());
}
@@ -64,7 +65,8 @@ class DocumentSearchResultTest {
UUID id = UUID.randomUUID();
ActivityActorDTO actor = new ActivityActorDTO("AB", "#f00", "Anna Braun");
DocumentListItem item = new DocumentListItem(
id, "T", "t.pdf", null, null, null,
id, "T", "t.pdf", null, null,
DatePrecision.UNKNOWN, null, null,
List.of(), List.of(), null, null, null, null,
75, List.of(actor), SearchMatchData.empty());

View File

@@ -117,6 +117,7 @@ class PersonControllerTest {
public Integer getDeathYear() { return null; }
public String getNotes() { return null; }
public boolean isFamilyMember() { return false; }
public boolean isProvisional() { return false; }
public long getDocumentCount() { return 0; }
};
}

View File

@@ -463,4 +463,46 @@ class PersonRepositoryTest {
assertThat(result).hasSize(1);
assertThat(result.get(0).getLastName()).isEqualTo("Gesellschafter des Verlages");
}
// ─── #671: provisional must be SELECTed in all three native projections ───
// Adding isProvisional() to the interface compiles even if a native query forgets
// to SELECT p.provisional — it then silently returns false. These tests are the only
// guard against that trap, so they must run against real Postgres.
@Test
void findAllWithDocumentCount_projectsProvisionalTrue() {
personRepository.save(Person.builder()
.firstName("Inferred").lastName("Person").provisional(true).build());
List<PersonSummaryDTO> result = personRepository.findAllWithDocumentCount();
assertThat(result).anyMatch(PersonSummaryDTO::isProvisional);
}
@Test
void searchWithDocumentCount_projectsProvisionalTrue() {
personRepository.save(Person.builder()
.firstName("Provisorisch").lastName("Müller").provisional(true).build());
List<PersonSummaryDTO> result = personRepository.searchWithDocumentCount("Provisorisch");
assertThat(result).hasSize(1);
assertThat(result.get(0).isProvisional()).isTrue();
}
@Test
void findTopByDocumentCount_projectsProvisionalTrue() {
Person provisional = personRepository.save(Person.builder()
.firstName("Top").lastName("Provisional").provisional(true).build());
documentRepository.save(Document.builder()
.title("Brief").originalFilename("b.pdf")
.status(DocumentStatus.UPLOADED)
.sender(provisional).build());
List<PersonSummaryDTO> result = personRepository.findTopByDocumentCount(10);
PersonSummaryDTO summary = result.stream()
.filter(p -> p.getId().equals(provisional.getId())).findFirst().orElseThrow();
assertThat(summary.isProvisional()).isTrue();
}
}

View File

@@ -25,6 +25,11 @@ _Not to be confused with [AppUser](#appuser-appuser)_ — `Person` is a historic
**UserGroup** (`UserGroup`) — a named permission bundle assigned to one or more `AppUser`s. A user's effective permissions are the union of all permissions across all groups they belong to.
**source_ref** (`Person.sourceRef`, `Tag.sourceRef`) — the import normalizer's stable identity for a `Person` (its `person_id`) or `Tag` (its canonical `tag_path`). It is the join key linking normalized records to documents and the idempotency key for re-import; null for manually created records and unique among non-null values.
**provisional person** (`Person.provisional`) — a `Person` the importer inferred from raw attribution text but could not confidently match to a known individual. The flag lets the persons directory surface uncertainty honestly rather than fabricate a confident identity; it defaults to `false` and is set `true` only by the importer.
_Not to be confused with `family_member`_ — `provisional` expresses import confidence, while `family_member` is a genealogical fact about whether the person belongs to the family tree.
---
## Document-Related Terms
@@ -36,6 +41,10 @@ _See also [TranscriptionBlock](#transcriptionblock-transcriptionblock)._
**Document** (`Document`) — a single archival item (letter, postcard, photograph) with a file stored in MinIO/S3 and associated metadata (sender, receivers, date, tags, transcription blocks).
**date precision** (`Document.metaDatePrecision`, enum `DatePrecision`) — how exactly a document's date is known, one of `DAY, MONTH, SEASON, YEAR, RANGE, APPROX, UNKNOWN`. A verbatim mirror of the import normalizer's `Precision` enum so honest dates can be rendered (`APPROX` → "ca.", `RANGE` uses `meta_date_end`) instead of fabricating a false `DAY`-level date. `UNKNOWN` is the explicit value for undated documents.
**raw attribution** (`Document.senderText`, `Document.receiverText`, `Document.metaDateRaw`) — the original spreadsheet cell text for a document's sender, receiver, and date, preserved verbatim even after a `Person` or normalized date is linked. It keeps provenance intact and enables an "as written in the original" view.
**DocumentVersion** (`DocumentVersion`) — an append-only snapshot of a `Document`'s metadata at a point in time. Append-only by convention; no consumer-facing create or update endpoint exists. The entity uses Lombok `@Data` (which generates setters), so immutability is enforced by application convention, not at the Java level.
**Tag** (`Tag`) — a hierarchical category that can be applied to `Document`s. Tags are self-referencing via a `parent_id` foreign key, forming a tree structure.

View File

@@ -0,0 +1,83 @@
# ADR-025 — Canonical Import Output as Contract & Single-Migration Schema Foundation
**Date:** 2026-05-27
**Status:** Accepted
**Issue:** #671
**Milestone:** Handling the Unknowns — honest uncertainty in dates & people
---
## Context
The "Handling the Unknowns" milestone introduces honest uncertainty into the archive:
documents whose dates are known only approximately or as a range, and people the importer
infers from raw attribution text but cannot confidently identify. Three sibling issues —
date precision (#666), name triage (#665), and the importer (#669) — each independently
planned a Flyway `V69` migration that altered `persons`. Three `V69`s is a boot failure
(Flyway versions must be unique), and `persons.provisional` was at risk of being defined
twice.
Two durable decisions had to be made before any application code in Phases 36 could
compile against the new schema.
---
## Decision
### 1. All import/precision/attribution/identity schema lives in ONE migration with a single owner
`V69__import_precision_attribution_identity_schema.sql` adds every new column for this
milestone in a single, atomic, forward-only migration:
- `documents`: `meta_date_precision` (backfilled `DAY` where dated / `UNKNOWN` where not,
then `NOT NULL`), `meta_date_end`, `meta_date_raw`, `sender_text`, `receiver_text`.
- `persons`: `source_ref` (unique index, nullable), `provisional` (`NOT NULL DEFAULT false`).
- `tag`: `source_ref` (unique index, nullable).
Integrity is pushed to the database as fail-closed `CHECK` constraints (the precedent is
`V22`'s `person_type` allowlist):
- `meta_date_precision` must be one of the seven enum values.
- `meta_date_end` may be non-null **only** when precision = `RANGE` (one-directional, not
biconditional — see Consequences).
- `meta_date_end >= meta_date` for ranges with both endpoints (a `CHECK`, not a trigger).
- `meta_date_raw`, `sender_text`, `receiver_text` are length-capped at 10 000 (mirrors the
`transcription_blocks` cap in `V18`).
No sibling issue adds another migration that alters `persons` or `documents` in this
milestone.
### 2. The backend `DatePrecision` enum is a verbatim mirror of the normalizer's `Precision`; the canonical output is the contract
The importer reads the Python normalizer's canonical output
(`tools/import-normalizer/`). The backend `DatePrecision` enum
(`DAY, MONTH, SEASON, YEAR, RANGE, APPROX, UNKNOWN`) is a verbatim copy of the normalizer's
`Precision(StrEnum)` (`dates.py`). There is **no translation layer**: the normalizer's
output strings are persisted as-is. The same applies to `source_ref`, which carries the
normalizer's `person_id` / canonical `tag_path` unchanged as the re-import idempotency key.
---
## Consequences
- **RANGE is one-directional, not biconditional.** A `RANGE` row may have a null
`meta_date_end` (an open-ended range with only a start), because the normalizer can emit
start-only ranges. A biconditional `RANGE ⟺ end IS NOT NULL` rule would reject valid
normalizer output, so it was rejected. Phase 4 rendering must handle a `RANGE` with no end
gracefully.
- **`provisional` stays `false` throughout this phase.** The column and flag exist, but no
code path sets it `true`; the importer (Phase 3) is the only writer. This is intentional,
not a half-built feature.
- **A future dev must not "improve" the enum.** Renaming or dropping a `DatePrecision` value
without changing the normalizer silently breaks import idempotency and date rendering. The
enum's Javadoc states this; the DB `CHECK` enforces validity independent of the Java enum.
- **`source_ref` is unique + nullable.** Manually created persons/tags have `source_ref =
NULL`; Postgres allows multiple NULLs under a plain unique index, so no backfill is needed.
- **Forward-only.** The migration is immutable once shipped (Flyway checksum model); any fix
goes in a later version. There is no down-migration — rollback means restoring from the
nightly `pg_dump`, the standard procedure.
- **`PersonSummaryDTO` coupling.** `provisional` was added to the `PersonSummaryDTO` native
interface projection; because the projection is backed by native SQL, the column had to be
added to all three native `SELECT`s (`findAllWithDocumentCount`, `searchWithDocumentCount`,
`findTopByDocumentCount`) or it would silently return `false`. Guarded by integration tests
against real Postgres.

View File

@@ -1,6 +1,6 @@
@startuml db-orm
' Schema source: Flyway V1V60 (excl. V37, V43 — intentionally removed)
' Schema as of: V60 (2026-05-06)
' Schema source: Flyway V1V69 (excl. V37, V43 — intentionally removed)
' Schema as of: V69 (2026-05-27)
' ⚠ This is a versioned snapshot. Update when the schema changes significantly.
hide circle
@@ -88,6 +88,11 @@ package "Documents" {
summary : TEXT
transcription : TEXT
meta_date : DATE
meta_date_precision : VARCHAR(16) NOT NULL
meta_date_end : DATE
meta_date_raw : TEXT
sender_text : TEXT
receiver_text : TEXT
meta_location : VARCHAR(255)
meta_document_location : VARCHAR(255)
archive_box : VARCHAR(255)
@@ -182,6 +187,8 @@ package "Persons" {
birth_year : INTEGER
death_year : INTEGER
family_member : BOOLEAN NOT NULL
source_ref : VARCHAR(255) UNIQUE
provisional : BOOLEAN NOT NULL
}
entity person_name_aliases {
@@ -217,6 +224,7 @@ package "Tags" {
name : VARCHAR(255) NOT NULL UNIQUE
parent_id : UUID <<FK>>
color : VARCHAR(20)
source_ref : VARCHAR(255) UNIQUE
}
}

View File

@@ -1,7 +1,9 @@
@startuml db-relationships
' Schema source: Flyway V1V60 (excl. V37, V43 — intentionally removed)
' Schema as of: V60 (2026-05-06)
' Schema source: Flyway V1V69 (excl. V37, V43 — intentionally removed)
' Schema as of: V69 (2026-05-27)
' ⚠ This is a versioned snapshot. Update when the schema changes significantly.
' Note: V69 adds columns only (persons.source_ref, tag.source_ref, document
' precision/attribution fields); no new FK relationships, so this diagram is unchanged.
hide circle
skinparam linetype ortho

View File

@@ -1636,6 +1636,7 @@ export interface components {
/** Format: uuid */
parentId?: string;
color?: string;
sourceRef?: string;
};
PersonUpdateDTO: {
/** @enum {string} */
@@ -1665,12 +1666,21 @@ export interface components {
/** Format: int32 */
deathYear?: number;
familyMember: boolean;
sourceRef?: string;
provisional: boolean;
readonly displayName: string;
};
DocumentUpdateDTO: {
title?: string;
/** Format: date */
documentDate?: string;
/** @enum {string} */
metaDatePrecision?: "DAY" | "MONTH" | "SEASON" | "YEAR" | "RANGE" | "APPROX" | "UNKNOWN";
/** Format: date */
metaDateEnd?: string;
metaDateRaw?: string;
senderText?: string;
receiverText?: string;
location?: string;
documentLocation?: string;
archiveBox?: string;
@@ -1704,6 +1714,13 @@ export interface components {
status: "PLACEHOLDER" | "UPLOADED" | "TRANSCRIBED" | "REVIEWED" | "ARCHIVED";
/** Format: date */
documentDate?: string;
/** @enum {string} */
metaDatePrecision: "DAY" | "MONTH" | "SEASON" | "YEAR" | "RANGE" | "APPROX" | "UNKNOWN";
/** Format: date */
metaDateEnd?: string;
metaDateRaw?: string;
senderText?: string;
receiverText?: string;
location?: string;
documentLocation?: string;
archiveBox?: string;
@@ -2024,6 +2041,10 @@ export interface components {
receiverIds?: string[];
/** Format: date */
documentDate?: string;
/** @enum {string} */
metaDatePrecision?: "DAY" | "MONTH" | "SEASON" | "YEAR" | "RANGE" | "APPROX" | "UNKNOWN";
/** Format: date */
metaDateEnd?: string;
location?: string;
tagNames?: string[];
metadataComplete?: boolean;
@@ -2221,6 +2242,7 @@ export interface components {
notes?: string;
personType?: string;
familyMember?: boolean;
provisional?: boolean;
};
InferredRelationshipWithPersonDTO: {
person: components["schemas"]["PersonNodeDTO"];
@@ -2396,6 +2418,10 @@ export interface components {
thumbnailUrl?: string;
/** Format: date */
documentDate?: string;
/** @enum {string} */
metaDatePrecision: "DAY" | "MONTH" | "SEASON" | "YEAR" | "RANGE" | "APPROX" | "UNKNOWN";
/** Format: date */
metaDateEnd?: string;
sender?: components["schemas"]["Person"];
receivers: components["schemas"]["Person"][];
tags: components["schemas"]["Tag"][];

View File

@@ -16,6 +16,7 @@ const baseDoc: Document = {
title: 'Brief an Hans',
originalFilename: 'brief.pdf',
status: 'UPLOADED',
metaDatePrecision: 'UNKNOWN',
metadataComplete: true,
scriptType: 'HANDWRITING_KURRENT',
createdAt: '2025-01-01T12:00:00Z',
@@ -127,7 +128,8 @@ describe('ReaderRecentDocs', () => {
firstName: 'Anna',
displayName: 'Anna Müller',
personType: 'PERSON' as const,
familyMember: false
familyMember: false,
provisional: false
}
};
render(ReaderRecentDocs, { documents: [docWithSender] });

View File

@@ -20,6 +20,7 @@ const makePerson = (id: string, name: string, overrides: Partial<Person> = {}):
displayName: name,
personType: 'PERSON',
familyMember: false,
provisional: false,
...overrides
};
};

View File

@@ -34,6 +34,7 @@ const AUGUSTE: Person = {
displayName: 'Auguste Raddatz',
personType: 'PERSON',
familyMember: false,
provisional: false,
birthYear: 1882,
deathYear: 1944
};
@@ -45,6 +46,7 @@ const ANNA: Person = {
displayName: 'Anna Schmidt',
personType: 'PERSON',
familyMember: false,
provisional: false,
birthYear: 1860
};