feat(schema): one migration + domain model for import/precision/identity (Phase 2, #671) #673
@@ -0,0 +1,17 @@
|
||||
package org.raddatz.familienarchiv.document;
|
||||
|
||||
/**
|
||||
* Precision of a document's date. Verbatim mirror of the import normalizer's
|
||||
* {@code Precision} enum (tools/import-normalizer/dates.py) — the canonical output is the
|
||||
* contract, so there is no translation layer. Do not add, remove, or rename values without
|
||||
* also changing the normalizer; a mismatch silently breaks import idempotency (see ADR-025).
|
||||
*/
|
||||
public enum DatePrecision {
|
||||
DAY,
|
||||
MONTH,
|
||||
SEASON,
|
||||
YEAR,
|
||||
RANGE,
|
||||
APPROX,
|
||||
UNKNOWN
|
||||
}
|
||||
@@ -91,6 +91,29 @@ public class Document {
|
||||
@Column(name = "meta_date")
|
||||
private LocalDate documentDate; // Wann wurde der Brief geschrieben?
|
||||
|
||||
// Precision of documentDate — drives honest rendering ("ca. 1943", "Frühjahr 1943").
|
||||
// Verbatim mirror of the normalizer's Precision enum (see ADR-025).
|
||||
@Enumerated(EnumType.STRING)
|
||||
@Column(name = "meta_date_precision", nullable = false, length = 16)
|
||||
@Schema(requiredMode = Schema.RequiredMode.REQUIRED)
|
||||
@Builder.Default
|
||||
private DatePrecision metaDatePrecision = DatePrecision.UNKNOWN;
|
||||
|
||||
// Range end — only set when metaDatePrecision is RANGE (open-ended ranges allowed → may be null).
|
||||
@Column(name = "meta_date_end")
|
||||
private LocalDate metaDateEnd;
|
||||
|
||||
// Original date cell, verbatim, preserved for provenance and "as written" display.
|
||||
@Column(name = "meta_date_raw", columnDefinition = "TEXT")
|
||||
private String metaDateRaw;
|
||||
|
||||
// Raw attribution preserved even when a person is linked via sender/receivers.
|
||||
@Column(name = "sender_text", columnDefinition = "TEXT")
|
||||
private String senderText;
|
||||
|
||||
@Column(name = "receiver_text", columnDefinition = "TEXT")
|
||||
private String receiverText;
|
||||
|
||||
@Column(name = "meta_location")
|
||||
private String location;
|
||||
|
||||
|
||||
@@ -12,6 +12,8 @@ public class DocumentBatchMetadataDTO {
|
||||
private UUID senderId;
|
||||
private List<UUID> receiverIds;
|
||||
private LocalDate documentDate;
|
||||
private DatePrecision metaDatePrecision;
|
||||
private LocalDate metaDateEnd;
|
||||
private String location;
|
||||
private List<String> tagNames;
|
||||
private Boolean metadataComplete;
|
||||
|
||||
@@ -18,6 +18,9 @@ public record DocumentListItem(
|
||||
String originalFilename,
|
||||
String thumbnailUrl,
|
||||
LocalDate documentDate,
|
||||
@Schema(requiredMode = Schema.RequiredMode.REQUIRED)
|
||||
DatePrecision metaDatePrecision,
|
||||
LocalDate metaDateEnd,
|
||||
Person sender,
|
||||
@Schema(requiredMode = Schema.RequiredMode.REQUIRED)
|
||||
List<Person> receivers,
|
||||
|
||||
@@ -758,6 +758,8 @@ public class DocumentService {
|
||||
doc.getOriginalFilename(),
|
||||
doc.getThumbnailUrl(),
|
||||
doc.getDocumentDate(),
|
||||
doc.getMetaDatePrecision(),
|
||||
doc.getMetaDateEnd(),
|
||||
doc.getSender(),
|
||||
List.copyOf(doc.getReceivers()),
|
||||
List.copyOf(doc.getTags()),
|
||||
|
||||
@@ -11,6 +11,11 @@ import org.raddatz.familienarchiv.ocr.ScriptType;
|
||||
public class DocumentUpdateDTO {
|
||||
private String title;
|
||||
private LocalDate documentDate;
|
||||
private DatePrecision metaDatePrecision;
|
||||
private LocalDate metaDateEnd;
|
||||
private String metaDateRaw;
|
||||
private String senderText;
|
||||
private String receiverText;
|
||||
private String location;
|
||||
private String documentLocation;
|
||||
private String archiveBox;
|
||||
|
||||
@@ -57,6 +57,18 @@ public class Person {
|
||||
@Schema(requiredMode = Schema.RequiredMode.REQUIRED)
|
||||
private boolean familyMember = false;
|
||||
|
||||
// The normalizer person_id — join key and re-import idempotency key. Null for manually
|
||||
// created persons; unique among non-null values (see ADR-025).
|
||||
@Column(name = "source_ref")
|
||||
private String sourceRef;
|
||||
|
||||
// A provisional person is one the importer inferred but could not confidently identify.
|
||||
// Distinct from familyMember (a genealogical fact); set true only by the importer (Phase 3).
|
||||
@Column(name = "provisional", nullable = false)
|
||||
@Builder.Default
|
||||
@Schema(requiredMode = Schema.RequiredMode.REQUIRED)
|
||||
private boolean provisional = false;
|
||||
|
||||
// Entity-graph navigation for JPA JOIN queries (e.g. DocumentSpecifications.hasText).
|
||||
// Uses entity relationship rather than cross-domain repository access, avoiding a
|
||||
// separate DB roundtrip while respecting domain boundaries.
|
||||
|
||||
@@ -41,7 +41,7 @@ public interface PersonRepository extends JpaRepository<Person, UUID> {
|
||||
SELECT p.id, p.title, p.first_name AS firstName, p.last_name AS lastName,
|
||||
p.person_type AS personType,
|
||||
p.alias, p.birth_year AS birthYear, p.death_year AS deathYear, p.notes,
|
||||
p.family_member AS familyMember,
|
||||
p.family_member AS familyMember, p.provisional AS provisional,
|
||||
(SELECT COUNT(*) FROM documents d WHERE d.sender_id = p.id)
|
||||
+ (SELECT COUNT(*) FROM document_receivers dr WHERE dr.person_id = p.id) AS documentCount
|
||||
FROM persons p
|
||||
@@ -54,7 +54,7 @@ public interface PersonRepository extends JpaRepository<Person, UUID> {
|
||||
SELECT p.id, p.title, p.first_name AS firstName, p.last_name AS lastName,
|
||||
p.person_type AS personType,
|
||||
p.alias, p.birth_year AS birthYear, p.death_year AS deathYear, p.notes,
|
||||
p.family_member AS familyMember,
|
||||
p.family_member AS familyMember, p.provisional AS provisional,
|
||||
(SELECT COUNT(*) FROM documents d WHERE d.sender_id = p.id)
|
||||
+ (SELECT COUNT(*) FROM document_receivers dr WHERE dr.person_id = p.id) AS documentCount
|
||||
FROM persons p
|
||||
@@ -63,7 +63,7 @@ public interface PersonRepository extends JpaRepository<Person, UUID> {
|
||||
OR LOWER(CONCAT(p.last_name,' ',COALESCE(p.first_name,''))) LIKE LOWER(CONCAT('%',:query,'%'))
|
||||
OR LOWER(p.alias) LIKE LOWER(CONCAT('%',:query,'%'))
|
||||
OR LOWER(a.last_name) LIKE LOWER(CONCAT('%',:query,'%'))
|
||||
GROUP BY p.id, p.title, p.first_name, p.last_name, p.person_type, p.alias, p.birth_year, p.death_year, p.notes, p.family_member
|
||||
GROUP BY p.id, p.title, p.first_name, p.last_name, p.person_type, p.alias, p.birth_year, p.death_year, p.notes, p.family_member, p.provisional
|
||||
ORDER BY p.last_name ASC, p.first_name ASC
|
||||
""",
|
||||
nativeQuery = true)
|
||||
@@ -75,7 +75,7 @@ public interface PersonRepository extends JpaRepository<Person, UUID> {
|
||||
SELECT p.id, p.title, p.first_name AS firstName, p.last_name AS lastName,
|
||||
p.person_type AS personType,
|
||||
p.alias, p.birth_year AS birthYear, p.death_year AS deathYear, p.notes,
|
||||
p.family_member AS familyMember,
|
||||
p.family_member AS familyMember, p.provisional AS provisional,
|
||||
(SELECT COUNT(*) FROM documents d WHERE d.sender_id = p.id)
|
||||
+ (SELECT COUNT(*) FROM document_receivers dr WHERE dr.person_id = p.id) AS documentCount
|
||||
FROM persons p
|
||||
|
||||
@@ -18,6 +18,7 @@ public interface PersonSummaryDTO {
|
||||
Integer getDeathYear();
|
||||
String getNotes();
|
||||
boolean isFamilyMember();
|
||||
boolean isProvisional();
|
||||
long getDocumentCount();
|
||||
|
||||
default String getDisplayName() {
|
||||
|
||||
@@ -30,4 +30,11 @@ public class Tag {
|
||||
|
||||
/** Color token name (e.g. "sage"), only set on root-level tags. Null means no color. */
|
||||
private String color;
|
||||
|
||||
/**
|
||||
* Import identity key, keyed on the canonical tag_path. Null for manually created tags;
|
||||
* unique among non-null values. The importer (Phase 3) uses it for idempotent re-import.
|
||||
*/
|
||||
@Column(name = "source_ref")
|
||||
private String sourceRef;
|
||||
}
|
||||
|
||||
@@ -0,0 +1,67 @@
|
||||
-- Phase 2 of "Handling the Unknowns": the schema foundation.
|
||||
-- Consolidates every new import/precision/attribution/identity column into ONE
|
||||
-- migration with a single owner so downstream phases (importer, rendering, persons
|
||||
-- directory) compile against a finished, collision-free schema. See ADR-025.
|
||||
--
|
||||
-- This file is forward-only and immutable once shipped (Flyway checksum model):
|
||||
-- any fix goes in a later version, never an edit here.
|
||||
|
||||
-- ─── documents: date precision, range end, raw date, raw attribution ──────────
|
||||
|
||||
-- Range end is only set for RANGE precision (open-ended ranges allowed → end may be null).
|
||||
ALTER TABLE documents ADD COLUMN meta_date_end date;
|
||||
|
||||
-- Original date cell, verbatim, for provenance and "as written" display (Phase 4).
|
||||
ALTER TABLE documents ADD COLUMN meta_date_raw text;
|
||||
|
||||
-- Raw attribution preserved even when a person is linked.
|
||||
ALTER TABLE documents ADD COLUMN sender_text text;
|
||||
ALTER TABLE documents ADD COLUMN receiver_text text;
|
||||
|
||||
-- Bound user-influenced spreadsheet text at the DB layer (mirrors transcription_blocks
|
||||
-- length cap in V18). Defense in depth against malformed/huge import cells.
|
||||
ALTER TABLE documents ADD CONSTRAINT chk_meta_date_raw_length CHECK (length(meta_date_raw) <= 10000);
|
||||
ALTER TABLE documents ADD CONSTRAINT chk_sender_text_length CHECK (length(sender_text) <= 10000);
|
||||
ALTER TABLE documents ADD CONSTRAINT chk_receiver_text_length CHECK (length(receiver_text) <= 10000);
|
||||
|
||||
-- Precision enum — added with a DB default of 'UNKNOWN', backfilled, then made NOT NULL.
|
||||
-- The DEFAULT serves two purposes: (1) existing rows get 'UNKNOWN' immediately, and
|
||||
-- (2) raw-SQL inserts that omit the column (test fixtures, ad-hoc data loads) get a sane,
|
||||
-- CHECK-valid value instead of violating the NOT NULL constraint. JPA saves still set it
|
||||
-- explicitly via the entity's @Builder.Default = DatePrecision.UNKNOWN.
|
||||
ALTER TABLE documents ADD COLUMN meta_date_precision varchar(16) DEFAULT 'UNKNOWN';
|
||||
|
||||
UPDATE documents
|
||||
SET meta_date_precision = CASE WHEN meta_date IS NOT NULL THEN 'DAY' ELSE 'UNKNOWN' END;
|
||||
|
||||
ALTER TABLE documents ALTER COLUMN meta_date_precision SET NOT NULL;
|
||||
|
||||
-- Fail-closed allowlist of the seven precision values (verbatim mirror of the
|
||||
-- normalizer's Precision enum). The DB enforces validity independent of the Java enum.
|
||||
ALTER TABLE documents ADD CONSTRAINT chk_meta_date_precision
|
||||
CHECK (meta_date_precision IN ('DAY', 'MONTH', 'SEASON', 'YEAR', 'RANGE', 'APPROX', 'UNKNOWN'));
|
||||
|
||||
-- A non-null range end is permitted only when precision = RANGE. A RANGE row MAY have a
|
||||
-- null end (open-ended range), so the rule is one-directional, not biconditional.
|
||||
ALTER TABLE documents ADD CONSTRAINT chk_meta_date_end_only_for_range
|
||||
CHECK (meta_date_end IS NULL OR meta_date_precision = 'RANGE');
|
||||
|
||||
-- For ranges with both endpoints, the end must not precede the start.
|
||||
ALTER TABLE documents ADD CONSTRAINT chk_meta_date_end_after_start
|
||||
CHECK (meta_date_end IS NULL OR meta_date IS NULL OR meta_date_end >= meta_date);
|
||||
|
||||
-- ─── persons: source_ref (import identity) + provisional flag ─────────────────
|
||||
|
||||
-- The normalizer person_id: join key for documents → persons and idempotency key for
|
||||
-- re-import. Nullable (manually created persons never have one); unique among non-nulls.
|
||||
ALTER TABLE persons ADD COLUMN source_ref varchar(255);
|
||||
CREATE UNIQUE INDEX idx_persons_source_ref ON persons (source_ref);
|
||||
|
||||
-- A provisional person is one the importer inferred but could not confidently identify.
|
||||
-- Stays false until Phase 3 (importer) sets it; no code path writes true in this phase.
|
||||
ALTER TABLE persons ADD COLUMN provisional boolean NOT NULL DEFAULT false;
|
||||
|
||||
-- ─── tag: source_ref (import identity, keyed on canonical tag_path) ───────────
|
||||
|
||||
ALTER TABLE tag ADD COLUMN source_ref varchar(255);
|
||||
CREATE UNIQUE INDEX idx_tag_source_ref ON tag (source_ref);
|
||||
@@ -479,6 +479,191 @@ class MigrationIntegrationTest {
|
||||
assertThat(count).isEqualTo(1);
|
||||
}
|
||||
|
||||
// ─── V69: import/precision/attribution/identity schema foundation ────────
|
||||
|
||||
@Test
|
||||
void v69_metaDatePrecisionColumn_isNotNull() {
|
||||
Integer count = jdbc.queryForObject(
|
||||
"""
|
||||
SELECT COUNT(*) FROM information_schema.columns
|
||||
WHERE table_schema = 'public'
|
||||
AND table_name = 'documents'
|
||||
AND column_name = 'meta_date_precision'
|
||||
AND is_nullable = 'NO'
|
||||
""",
|
||||
Integer.class);
|
||||
assertThat(count).isEqualTo(1);
|
||||
}
|
||||
|
||||
@Test
|
||||
void v69_backfillSql_setsDatedRowsToDayPrecision() {
|
||||
// Re-run the migration's backfill UPDATE on a freshly dated row to prove the rule.
|
||||
UUID docId = createDocumentWithDate("1943-05-12");
|
||||
|
||||
jdbc.update(V69_BACKFILL_PRECISION_SQL);
|
||||
|
||||
String precision = jdbc.queryForObject(
|
||||
"SELECT meta_date_precision FROM documents WHERE id = ?", String.class, docId);
|
||||
assertThat(precision).isEqualTo("DAY");
|
||||
}
|
||||
|
||||
@Test
|
||||
void v69_backfillSql_setsUndatedRowsToUnknownPrecision() {
|
||||
UUID docId = createDocument(); // no meta_date
|
||||
|
||||
jdbc.update(V69_BACKFILL_PRECISION_SQL);
|
||||
|
||||
String precision = jdbc.queryForObject(
|
||||
"SELECT meta_date_precision FROM documents WHERE id = ?", String.class, docId);
|
||||
assertThat(precision).isEqualTo("UNKNOWN");
|
||||
}
|
||||
|
||||
// Mirrors the backfill UPDATE shipped in V69; idempotent for verification.
|
||||
private static final String V69_BACKFILL_PRECISION_SQL = """
|
||||
UPDATE documents
|
||||
SET meta_date_precision = CASE WHEN meta_date IS NOT NULL THEN 'DAY' ELSE 'UNKNOWN' END
|
||||
""";
|
||||
|
||||
@Test
|
||||
void v69_precisionCheck_rejectsValueOutsideEnum() {
|
||||
UUID docId = createDocument();
|
||||
|
||||
assertThatThrownBy(() ->
|
||||
jdbc.update("UPDATE documents SET meta_date_precision = 'BOGUS' WHERE id = ?", docId)
|
||||
).isInstanceOf(DataIntegrityViolationException.class);
|
||||
}
|
||||
|
||||
@Test
|
||||
void v69_metaDateEndCheck_rejectsNonNullEndWhenPrecisionNotRange() {
|
||||
UUID docId = createDocumentWithDate("1943-05-12"); // precision DAY
|
||||
|
||||
assertThatThrownBy(() ->
|
||||
jdbc.update("UPDATE documents SET meta_date_end = '1943-06-01' WHERE id = ?", docId)
|
||||
).isInstanceOf(DataIntegrityViolationException.class);
|
||||
}
|
||||
|
||||
@Test
|
||||
void v69_metaDateEndCheck_allowsNonNullEndWhenPrecisionRange() {
|
||||
UUID docId = createDocumentWithDate("1943-05-12");
|
||||
|
||||
int rows = jdbc.update(
|
||||
"UPDATE documents SET meta_date_precision = 'RANGE', meta_date_end = '1943-06-01' WHERE id = ?",
|
||||
docId);
|
||||
assertThat(rows).isEqualTo(1);
|
||||
}
|
||||
|
||||
@Test
|
||||
void v69_metaDateEndCheck_allowsRangeWithNullEnd() {
|
||||
// Loose semantics: the normalizer may emit an open-ended RANGE (start only).
|
||||
UUID docId = createDocumentWithDate("1943-05-12");
|
||||
|
||||
int rows = jdbc.update(
|
||||
"UPDATE documents SET meta_date_precision = 'RANGE' WHERE id = ?", docId);
|
||||
assertThat(rows).isEqualTo(1);
|
||||
}
|
||||
|
||||
@Test
|
||||
void v69_metaDateEndCheck_allowsRangeWithBothEndpointsNull() {
|
||||
// Fully-open RANGE: neither start (meta_date) nor end (meta_date_end) is set.
|
||||
// Both CHECKs hold (end IS NULL passes chk_meta_date_end_only_for_range; both-null
|
||||
// passes chk_meta_date_end_after_start), so the row survives. This locks the actual
|
||||
// DB behavior so a future tightening to a biconditional rule is a deliberate change.
|
||||
UUID docId = createDocument(); // null meta_date
|
||||
|
||||
int rows = jdbc.update(
|
||||
"UPDATE documents SET meta_date_precision = 'RANGE' WHERE id = ?", docId);
|
||||
assertThat(rows).isEqualTo(1);
|
||||
|
||||
Object metaDate = jdbc.queryForObject("SELECT meta_date FROM documents WHERE id = ?", Object.class, docId);
|
||||
Object metaDateEnd = jdbc.queryForObject(
|
||||
"SELECT meta_date_end FROM documents WHERE id = ?", Object.class, docId);
|
||||
assertThat(metaDate).isNull();
|
||||
assertThat(metaDateEnd).isNull();
|
||||
}
|
||||
|
||||
@Test
|
||||
void v69_rangeOrderCheck_rejectsEndBeforeStart() {
|
||||
UUID docId = createDocumentWithDate("1943-05-12");
|
||||
|
||||
assertThatThrownBy(() ->
|
||||
jdbc.update(
|
||||
"UPDATE documents SET meta_date_precision = 'RANGE', meta_date_end = '1943-01-01' WHERE id = ?",
|
||||
docId)
|
||||
).isInstanceOf(DataIntegrityViolationException.class);
|
||||
}
|
||||
|
||||
@Test
|
||||
void v69_metaDateRawCheck_rejectsOverlongText() {
|
||||
UUID docId = createDocument();
|
||||
String tooLong = "x".repeat(10001);
|
||||
|
||||
assertThatThrownBy(() ->
|
||||
jdbc.update("UPDATE documents SET meta_date_raw = ? WHERE id = ?", tooLong, docId)
|
||||
).isInstanceOf(DataIntegrityViolationException.class);
|
||||
}
|
||||
|
||||
@Test
|
||||
void v69_senderTextAndReceiverText_storeRawAttribution() {
|
||||
UUID docId = createDocument();
|
||||
|
||||
int rows = jdbc.update(
|
||||
"UPDATE documents SET sender_text = 'Oma Anna', receiver_text = 'Tante Grete' WHERE id = ?",
|
||||
docId);
|
||||
assertThat(rows).isEqualTo(1);
|
||||
}
|
||||
|
||||
@Test
|
||||
@Transactional(propagation = Propagation.NOT_SUPPORTED)
|
||||
void v69_personsSourceRef_uniqueIndexRejectsDuplicate() {
|
||||
jdbc.update(
|
||||
"INSERT INTO persons (id, last_name, source_ref) VALUES (gen_random_uuid(), 'A', 'person:dup')");
|
||||
try {
|
||||
assertThatThrownBy(() ->
|
||||
jdbc.update(
|
||||
"INSERT INTO persons (id, last_name, source_ref) VALUES (gen_random_uuid(), 'B', 'person:dup')")
|
||||
).isInstanceOf(DataIntegrityViolationException.class);
|
||||
} finally {
|
||||
jdbc.update("DELETE FROM persons WHERE source_ref = 'person:dup'");
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
@Transactional(propagation = Propagation.NOT_SUPPORTED)
|
||||
void v69_personsSourceRef_allowsMultipleNulls() {
|
||||
UUID a = createPerson("Null", "RefA");
|
||||
UUID b = createPerson("Null", "RefB");
|
||||
try {
|
||||
String refA = jdbc.queryForObject("SELECT source_ref FROM persons WHERE id = ?", String.class, a);
|
||||
String refB = jdbc.queryForObject("SELECT source_ref FROM persons WHERE id = ?", String.class, b);
|
||||
assertThat(refA).isNull();
|
||||
assertThat(refB).isNull();
|
||||
} finally {
|
||||
jdbc.update("DELETE FROM persons WHERE id IN (?, ?)", a, b);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
void v69_personsProvisional_defaultsToFalse() {
|
||||
UUID id = createPerson("Provisional", "Default");
|
||||
|
||||
Boolean provisional = jdbc.queryForObject(
|
||||
"SELECT provisional FROM persons WHERE id = ?", Boolean.class, id);
|
||||
assertThat(provisional).isFalse();
|
||||
}
|
||||
|
||||
@Test
|
||||
@Transactional(propagation = Propagation.NOT_SUPPORTED)
|
||||
void v69_tagSourceRef_uniqueIndexRejectsDuplicate() {
|
||||
jdbc.update("INSERT INTO tag (id, name, source_ref) VALUES (gen_random_uuid(), 'TagDupA', 'tag:dup')");
|
||||
try {
|
||||
assertThatThrownBy(() ->
|
||||
jdbc.update("INSERT INTO tag (id, name, source_ref) VALUES (gen_random_uuid(), 'TagDupB', 'tag:dup')")
|
||||
).isInstanceOf(DataIntegrityViolationException.class);
|
||||
} finally {
|
||||
jdbc.update("DELETE FROM tag WHERE source_ref = 'tag:dup'");
|
||||
}
|
||||
}
|
||||
|
||||
// ─── helpers ─────────────────────────────────────────────────────────────
|
||||
|
||||
private UUID createPerson(String firstName, String lastName) {
|
||||
@@ -504,6 +689,12 @@ class MigrationIntegrationTest {
|
||||
return doc.getId();
|
||||
}
|
||||
|
||||
private UUID createDocumentWithDate(String isoDate) {
|
||||
UUID id = createDocument();
|
||||
jdbc.update("UPDATE documents SET meta_date = ?::date WHERE id = ?", isoDate, id);
|
||||
return id;
|
||||
}
|
||||
|
||||
private UUID insertAnnotation(UUID docId) {
|
||||
UUID id = UUID.randomUUID();
|
||||
jdbc.update("""
|
||||
|
||||
@@ -133,7 +133,8 @@ class DocumentControllerTest {
|
||||
"Er schrieb einen langen Brief", List.of(), false, List.of(), List.of(), List.of(), null, List.of());
|
||||
when(documentService.searchDocuments(any(), any(), any(), any(), any(), any(), any(), any(), any(), any(), any(), any()))
|
||||
.thenReturn(DocumentSearchResult.of(List.of(new DocumentListItem(
|
||||
docId, "Brief an Anna", "brief.pdf", null, null, null,
|
||||
docId, "Brief an Anna", "brief.pdf", null, null,
|
||||
DatePrecision.UNKNOWN, null, null,
|
||||
List.of(), List.of(), null, null, null, null,
|
||||
0, List.of(), matchData))));
|
||||
|
||||
@@ -151,7 +152,8 @@ class DocumentControllerTest {
|
||||
var matchData = new SearchMatchData(null, List.of(), false, List.of(), List.of(), List.of(), null, List.of());
|
||||
when(documentService.searchDocuments(any(), any(), any(), any(), any(), any(), any(), any(), any(), any(), any(), any()))
|
||||
.thenReturn(DocumentSearchResult.of(List.of(new DocumentListItem(
|
||||
docId, "Brief an Anna", "brief.pdf", null, null, null,
|
||||
docId, "Brief an Anna", "brief.pdf", null, null,
|
||||
DatePrecision.UNKNOWN, null, null,
|
||||
List.of(), List.of(), null, null, null, null,
|
||||
0, List.of(), matchData))));
|
||||
|
||||
|
||||
@@ -81,6 +81,28 @@ class DocumentListItemIntegrationTest {
|
||||
assertThat(item.title()).isEqualTo("Kurrent Brief");
|
||||
}
|
||||
|
||||
@Test
|
||||
void search_listItem_carriesMetaDatePrecisionAndEnd() {
|
||||
documentRepository.save(Document.builder()
|
||||
.title("Range Brief")
|
||||
.originalFilename("range.pdf")
|
||||
.status(DocumentStatus.UPLOADED)
|
||||
.documentDate(java.time.LocalDate.of(1943, 1, 1))
|
||||
.metaDatePrecision(DatePrecision.RANGE)
|
||||
.metaDateEnd(java.time.LocalDate.of(1943, 12, 31))
|
||||
.build());
|
||||
|
||||
DocumentSearchResult result = documentService.searchDocuments(
|
||||
null, null, null, null, null, null, null, null,
|
||||
DocumentSort.DATE, "DESC", null,
|
||||
PageRequest.of(0, 50));
|
||||
|
||||
DocumentListItem item = result.items().stream()
|
||||
.filter(i -> i.title().equals("Range Brief")).findFirst().orElseThrow();
|
||||
assertThat(item.metaDatePrecision()).isEqualTo(DatePrecision.RANGE);
|
||||
assertThat(item.metaDateEnd()).isEqualTo(java.time.LocalDate.of(1943, 12, 31));
|
||||
}
|
||||
|
||||
@Test
|
||||
void detail_stillReturnsTrainingLabels() {
|
||||
Document saved = documentRepository.save(Document.builder()
|
||||
|
||||
@@ -14,7 +14,8 @@ class DocumentSearchResultTest {
|
||||
|
||||
private DocumentListItem item(UUID docId) {
|
||||
return new DocumentListItem(
|
||||
docId, "Test", "test.pdf", null, null, null,
|
||||
docId, "Test", "test.pdf", null, null,
|
||||
DatePrecision.UNKNOWN, null, null,
|
||||
List.of(), List.of(), null, null, null, null,
|
||||
0, List.of(), SearchMatchData.empty());
|
||||
}
|
||||
@@ -64,7 +65,8 @@ class DocumentSearchResultTest {
|
||||
UUID id = UUID.randomUUID();
|
||||
ActivityActorDTO actor = new ActivityActorDTO("AB", "#f00", "Anna Braun");
|
||||
DocumentListItem item = new DocumentListItem(
|
||||
id, "T", "t.pdf", null, null, null,
|
||||
id, "T", "t.pdf", null, null,
|
||||
DatePrecision.UNKNOWN, null, null,
|
||||
List.of(), List.of(), null, null, null, null,
|
||||
75, List.of(actor), SearchMatchData.empty());
|
||||
|
||||
|
||||
@@ -117,6 +117,7 @@ class PersonControllerTest {
|
||||
public Integer getDeathYear() { return null; }
|
||||
public String getNotes() { return null; }
|
||||
public boolean isFamilyMember() { return false; }
|
||||
public boolean isProvisional() { return false; }
|
||||
public long getDocumentCount() { return 0; }
|
||||
};
|
||||
}
|
||||
|
||||
@@ -463,4 +463,46 @@ class PersonRepositoryTest {
|
||||
assertThat(result).hasSize(1);
|
||||
assertThat(result.get(0).getLastName()).isEqualTo("Gesellschafter des Verlages");
|
||||
}
|
||||
|
||||
// ─── #671: provisional must be SELECTed in all three native projections ───
|
||||
// Adding isProvisional() to the interface compiles even if a native query forgets
|
||||
// to SELECT p.provisional — it then silently returns false. These tests are the only
|
||||
// guard against that trap, so they must run against real Postgres.
|
||||
|
||||
@Test
|
||||
void findAllWithDocumentCount_projectsProvisionalTrue() {
|
||||
personRepository.save(Person.builder()
|
||||
.firstName("Inferred").lastName("Person").provisional(true).build());
|
||||
|
||||
List<PersonSummaryDTO> result = personRepository.findAllWithDocumentCount();
|
||||
|
||||
assertThat(result).anyMatch(PersonSummaryDTO::isProvisional);
|
||||
}
|
||||
|
||||
@Test
|
||||
void searchWithDocumentCount_projectsProvisionalTrue() {
|
||||
personRepository.save(Person.builder()
|
||||
.firstName("Provisorisch").lastName("Müller").provisional(true).build());
|
||||
|
||||
List<PersonSummaryDTO> result = personRepository.searchWithDocumentCount("Provisorisch");
|
||||
|
||||
assertThat(result).hasSize(1);
|
||||
assertThat(result.get(0).isProvisional()).isTrue();
|
||||
}
|
||||
|
||||
@Test
|
||||
void findTopByDocumentCount_projectsProvisionalTrue() {
|
||||
Person provisional = personRepository.save(Person.builder()
|
||||
.firstName("Top").lastName("Provisional").provisional(true).build());
|
||||
documentRepository.save(Document.builder()
|
||||
.title("Brief").originalFilename("b.pdf")
|
||||
.status(DocumentStatus.UPLOADED)
|
||||
.sender(provisional).build());
|
||||
|
||||
List<PersonSummaryDTO> result = personRepository.findTopByDocumentCount(10);
|
||||
|
||||
PersonSummaryDTO summary = result.stream()
|
||||
.filter(p -> p.getId().equals(provisional.getId())).findFirst().orElseThrow();
|
||||
assertThat(summary.isProvisional()).isTrue();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -25,6 +25,11 @@ _Not to be confused with [AppUser](#appuser-appuser)_ — `Person` is a historic
|
||||
|
||||
**UserGroup** (`UserGroup`) — a named permission bundle assigned to one or more `AppUser`s. A user's effective permissions are the union of all permissions across all groups they belong to.
|
||||
|
||||
**source_ref** (`Person.sourceRef`, `Tag.sourceRef`) — the import normalizer's stable identity for a `Person` (its `person_id`) or `Tag` (its canonical `tag_path`). It is the join key linking normalized records to documents and the idempotency key for re-import; null for manually created records and unique among non-null values.
|
||||
|
||||
**provisional person** (`Person.provisional`) — a `Person` the importer inferred from raw attribution text but could not confidently match to a known individual. The flag lets the persons directory surface uncertainty honestly rather than fabricate a confident identity; it defaults to `false` and is set `true` only by the importer.
|
||||
_Not to be confused with `family_member`_ — `provisional` expresses import confidence, while `family_member` is a genealogical fact about whether the person belongs to the family tree.
|
||||
|
||||
---
|
||||
|
||||
## Document-Related Terms
|
||||
@@ -36,6 +41,10 @@ _See also [TranscriptionBlock](#transcriptionblock-transcriptionblock)._
|
||||
|
||||
**Document** (`Document`) — a single archival item (letter, postcard, photograph) with a file stored in MinIO/S3 and associated metadata (sender, receivers, date, tags, transcription blocks).
|
||||
|
||||
**date precision** (`Document.metaDatePrecision`, enum `DatePrecision`) — how exactly a document's date is known, one of `DAY, MONTH, SEASON, YEAR, RANGE, APPROX, UNKNOWN`. A verbatim mirror of the import normalizer's `Precision` enum so honest dates can be rendered (`APPROX` → "ca.", `RANGE` uses `meta_date_end`) instead of fabricating a false `DAY`-level date. `UNKNOWN` is the explicit value for undated documents.
|
||||
|
||||
**raw attribution** (`Document.senderText`, `Document.receiverText`, `Document.metaDateRaw`) — the original spreadsheet cell text for a document's sender, receiver, and date, preserved verbatim even after a `Person` or normalized date is linked. It keeps provenance intact and enables an "as written in the original" view.
|
||||
|
||||
**DocumentVersion** (`DocumentVersion`) — an append-only snapshot of a `Document`'s metadata at a point in time. Append-only by convention; no consumer-facing create or update endpoint exists. The entity uses Lombok `@Data` (which generates setters), so immutability is enforced by application convention, not at the Java level.
|
||||
|
||||
**Tag** (`Tag`) — a hierarchical category that can be applied to `Document`s. Tags are self-referencing via a `parent_id` foreign key, forming a tree structure.
|
||||
|
||||
@@ -0,0 +1,83 @@
|
||||
# ADR-025 — Canonical Import Output as Contract & Single-Migration Schema Foundation
|
||||
|
||||
**Date:** 2026-05-27
|
||||
**Status:** Accepted
|
||||
**Issue:** #671
|
||||
**Milestone:** Handling the Unknowns — honest uncertainty in dates & people
|
||||
|
||||
---
|
||||
|
||||
## Context
|
||||
|
||||
The "Handling the Unknowns" milestone introduces honest uncertainty into the archive:
|
||||
documents whose dates are known only approximately or as a range, and people the importer
|
||||
infers from raw attribution text but cannot confidently identify. Three sibling issues —
|
||||
date precision (#666), name triage (#665), and the importer (#669) — each independently
|
||||
planned a Flyway `V69` migration that altered `persons`. Three `V69`s is a boot failure
|
||||
(Flyway versions must be unique), and `persons.provisional` was at risk of being defined
|
||||
twice.
|
||||
|
||||
Two durable decisions had to be made before any application code in Phases 3–6 could
|
||||
compile against the new schema.
|
||||
|
||||
---
|
||||
|
||||
## Decision
|
||||
|
||||
### 1. All import/precision/attribution/identity schema lives in ONE migration with a single owner
|
||||
|
||||
`V69__import_precision_attribution_identity_schema.sql` adds every new column for this
|
||||
milestone in a single, atomic, forward-only migration:
|
||||
|
||||
- `documents`: `meta_date_precision` (backfilled `DAY` where dated / `UNKNOWN` where not,
|
||||
then `NOT NULL`), `meta_date_end`, `meta_date_raw`, `sender_text`, `receiver_text`.
|
||||
- `persons`: `source_ref` (unique index, nullable), `provisional` (`NOT NULL DEFAULT false`).
|
||||
- `tag`: `source_ref` (unique index, nullable).
|
||||
|
||||
Integrity is pushed to the database as fail-closed `CHECK` constraints (the precedent is
|
||||
`V22`'s `person_type` allowlist):
|
||||
|
||||
- `meta_date_precision` must be one of the seven enum values.
|
||||
- `meta_date_end` may be non-null **only** when precision = `RANGE` (one-directional, not
|
||||
biconditional — see Consequences).
|
||||
- `meta_date_end >= meta_date` for ranges with both endpoints (a `CHECK`, not a trigger).
|
||||
- `meta_date_raw`, `sender_text`, `receiver_text` are length-capped at 10 000 (mirrors the
|
||||
`transcription_blocks` cap in `V18`).
|
||||
|
||||
No sibling issue adds another migration that alters `persons` or `documents` in this
|
||||
milestone.
|
||||
|
||||
### 2. The backend `DatePrecision` enum is a verbatim mirror of the normalizer's `Precision`; the canonical output is the contract
|
||||
|
||||
The importer reads the Python normalizer's canonical output
|
||||
(`tools/import-normalizer/`). The backend `DatePrecision` enum
|
||||
(`DAY, MONTH, SEASON, YEAR, RANGE, APPROX, UNKNOWN`) is a verbatim copy of the normalizer's
|
||||
`Precision(StrEnum)` (`dates.py`). There is **no translation layer**: the normalizer's
|
||||
output strings are persisted as-is. The same applies to `source_ref`, which carries the
|
||||
normalizer's `person_id` / canonical `tag_path` unchanged as the re-import idempotency key.
|
||||
|
||||
---
|
||||
|
||||
## Consequences
|
||||
|
||||
- **RANGE is one-directional, not biconditional.** A `RANGE` row may have a null
|
||||
`meta_date_end` (an open-ended range with only a start), because the normalizer can emit
|
||||
start-only ranges. A biconditional `RANGE ⟺ end IS NOT NULL` rule would reject valid
|
||||
normalizer output, so it was rejected. Phase 4 rendering must handle a `RANGE` with no end
|
||||
gracefully.
|
||||
- **`provisional` stays `false` throughout this phase.** The column and flag exist, but no
|
||||
code path sets it `true`; the importer (Phase 3) is the only writer. This is intentional,
|
||||
not a half-built feature.
|
||||
- **A future dev must not "improve" the enum.** Renaming or dropping a `DatePrecision` value
|
||||
without changing the normalizer silently breaks import idempotency and date rendering. The
|
||||
enum's Javadoc states this; the DB `CHECK` enforces validity independent of the Java enum.
|
||||
- **`source_ref` is unique + nullable.** Manually created persons/tags have `source_ref =
|
||||
NULL`; Postgres allows multiple NULLs under a plain unique index, so no backfill is needed.
|
||||
- **Forward-only.** The migration is immutable once shipped (Flyway checksum model); any fix
|
||||
goes in a later version. There is no down-migration — rollback means restoring from the
|
||||
nightly `pg_dump`, the standard procedure.
|
||||
- **`PersonSummaryDTO` coupling.** `provisional` was added to the `PersonSummaryDTO` native
|
||||
interface projection; because the projection is backed by native SQL, the column had to be
|
||||
added to all three native `SELECT`s (`findAllWithDocumentCount`, `searchWithDocumentCount`,
|
||||
`findTopByDocumentCount`) or it would silently return `false`. Guarded by integration tests
|
||||
against real Postgres.
|
||||
@@ -1,6 +1,6 @@
|
||||
@startuml db-orm
|
||||
' Schema source: Flyway V1–V60 (excl. V37, V43 — intentionally removed)
|
||||
' Schema as of: V60 (2026-05-06)
|
||||
' Schema source: Flyway V1–V69 (excl. V37, V43 — intentionally removed)
|
||||
' Schema as of: V69 (2026-05-27)
|
||||
' ⚠ This is a versioned snapshot. Update when the schema changes significantly.
|
||||
|
||||
hide circle
|
||||
@@ -88,6 +88,11 @@ package "Documents" {
|
||||
summary : TEXT
|
||||
transcription : TEXT
|
||||
meta_date : DATE
|
||||
meta_date_precision : VARCHAR(16) NOT NULL
|
||||
meta_date_end : DATE
|
||||
meta_date_raw : TEXT
|
||||
sender_text : TEXT
|
||||
receiver_text : TEXT
|
||||
meta_location : VARCHAR(255)
|
||||
meta_document_location : VARCHAR(255)
|
||||
archive_box : VARCHAR(255)
|
||||
@@ -182,6 +187,8 @@ package "Persons" {
|
||||
birth_year : INTEGER
|
||||
death_year : INTEGER
|
||||
family_member : BOOLEAN NOT NULL
|
||||
source_ref : VARCHAR(255) UNIQUE
|
||||
provisional : BOOLEAN NOT NULL
|
||||
}
|
||||
|
||||
entity person_name_aliases {
|
||||
@@ -217,6 +224,7 @@ package "Tags" {
|
||||
name : VARCHAR(255) NOT NULL UNIQUE
|
||||
parent_id : UUID <<FK>>
|
||||
color : VARCHAR(20)
|
||||
source_ref : VARCHAR(255) UNIQUE
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -1,7 +1,9 @@
|
||||
@startuml db-relationships
|
||||
' Schema source: Flyway V1–V60 (excl. V37, V43 — intentionally removed)
|
||||
' Schema as of: V60 (2026-05-06)
|
||||
' Schema source: Flyway V1–V69 (excl. V37, V43 — intentionally removed)
|
||||
' Schema as of: V69 (2026-05-27)
|
||||
' ⚠ This is a versioned snapshot. Update when the schema changes significantly.
|
||||
' Note: V69 adds columns only (persons.source_ref, tag.source_ref, document
|
||||
' precision/attribution fields); no new FK relationships, so this diagram is unchanged.
|
||||
|
||||
hide circle
|
||||
skinparam linetype ortho
|
||||
|
||||
@@ -1636,6 +1636,7 @@ export interface components {
|
||||
/** Format: uuid */
|
||||
parentId?: string;
|
||||
color?: string;
|
||||
sourceRef?: string;
|
||||
};
|
||||
PersonUpdateDTO: {
|
||||
/** @enum {string} */
|
||||
@@ -1665,12 +1666,21 @@ export interface components {
|
||||
/** Format: int32 */
|
||||
deathYear?: number;
|
||||
familyMember: boolean;
|
||||
sourceRef?: string;
|
||||
provisional: boolean;
|
||||
readonly displayName: string;
|
||||
};
|
||||
DocumentUpdateDTO: {
|
||||
title?: string;
|
||||
/** Format: date */
|
||||
documentDate?: string;
|
||||
/** @enum {string} */
|
||||
metaDatePrecision?: "DAY" | "MONTH" | "SEASON" | "YEAR" | "RANGE" | "APPROX" | "UNKNOWN";
|
||||
/** Format: date */
|
||||
metaDateEnd?: string;
|
||||
metaDateRaw?: string;
|
||||
senderText?: string;
|
||||
receiverText?: string;
|
||||
location?: string;
|
||||
documentLocation?: string;
|
||||
archiveBox?: string;
|
||||
@@ -1704,6 +1714,13 @@ export interface components {
|
||||
status: "PLACEHOLDER" | "UPLOADED" | "TRANSCRIBED" | "REVIEWED" | "ARCHIVED";
|
||||
/** Format: date */
|
||||
documentDate?: string;
|
||||
/** @enum {string} */
|
||||
metaDatePrecision: "DAY" | "MONTH" | "SEASON" | "YEAR" | "RANGE" | "APPROX" | "UNKNOWN";
|
||||
/** Format: date */
|
||||
metaDateEnd?: string;
|
||||
metaDateRaw?: string;
|
||||
senderText?: string;
|
||||
receiverText?: string;
|
||||
location?: string;
|
||||
documentLocation?: string;
|
||||
archiveBox?: string;
|
||||
@@ -2024,6 +2041,10 @@ export interface components {
|
||||
receiverIds?: string[];
|
||||
/** Format: date */
|
||||
documentDate?: string;
|
||||
/** @enum {string} */
|
||||
metaDatePrecision?: "DAY" | "MONTH" | "SEASON" | "YEAR" | "RANGE" | "APPROX" | "UNKNOWN";
|
||||
/** Format: date */
|
||||
metaDateEnd?: string;
|
||||
location?: string;
|
||||
tagNames?: string[];
|
||||
metadataComplete?: boolean;
|
||||
@@ -2221,6 +2242,7 @@ export interface components {
|
||||
notes?: string;
|
||||
personType?: string;
|
||||
familyMember?: boolean;
|
||||
provisional?: boolean;
|
||||
};
|
||||
InferredRelationshipWithPersonDTO: {
|
||||
person: components["schemas"]["PersonNodeDTO"];
|
||||
@@ -2396,6 +2418,10 @@ export interface components {
|
||||
thumbnailUrl?: string;
|
||||
/** Format: date */
|
||||
documentDate?: string;
|
||||
/** @enum {string} */
|
||||
metaDatePrecision: "DAY" | "MONTH" | "SEASON" | "YEAR" | "RANGE" | "APPROX" | "UNKNOWN";
|
||||
/** Format: date */
|
||||
metaDateEnd?: string;
|
||||
sender?: components["schemas"]["Person"];
|
||||
receivers: components["schemas"]["Person"][];
|
||||
tags: components["schemas"]["Tag"][];
|
||||
|
||||
@@ -16,6 +16,7 @@ const baseDoc: Document = {
|
||||
title: 'Brief an Hans',
|
||||
originalFilename: 'brief.pdf',
|
||||
status: 'UPLOADED',
|
||||
metaDatePrecision: 'UNKNOWN',
|
||||
metadataComplete: true,
|
||||
scriptType: 'HANDWRITING_KURRENT',
|
||||
createdAt: '2025-01-01T12:00:00Z',
|
||||
@@ -127,7 +128,8 @@ describe('ReaderRecentDocs', () => {
|
||||
firstName: 'Anna',
|
||||
displayName: 'Anna Müller',
|
||||
personType: 'PERSON' as const,
|
||||
familyMember: false
|
||||
familyMember: false,
|
||||
provisional: false
|
||||
}
|
||||
};
|
||||
render(ReaderRecentDocs, { documents: [docWithSender] });
|
||||
|
||||
@@ -20,6 +20,7 @@ const makePerson = (id: string, name: string, overrides: Partial<Person> = {}):
|
||||
displayName: name,
|
||||
personType: 'PERSON',
|
||||
familyMember: false,
|
||||
provisional: false,
|
||||
...overrides
|
||||
};
|
||||
};
|
||||
|
||||
@@ -34,6 +34,7 @@ const AUGUSTE: Person = {
|
||||
displayName: 'Auguste Raddatz',
|
||||
personType: 'PERSON',
|
||||
familyMember: false,
|
||||
provisional: false,
|
||||
birthYear: 1882,
|
||||
deathYear: 1944
|
||||
};
|
||||
@@ -45,6 +46,7 @@ const ANNA: Person = {
|
||||
displayName: 'Anna Schmidt',
|
||||
personType: 'PERSON',
|
||||
familyMember: false,
|
||||
provisional: false,
|
||||
birthYear: 1860
|
||||
};
|
||||
|
||||
|
||||
Reference in New Issue
Block a user