feat(schema): add V69 migration + DatePrecision enum + entity fields
Consolidate every new import/precision/attribution/identity column into ONE Flyway migration (V69) so downstream phases compile against a finished, collision-free schema: - documents: meta_date_precision (backfilled DAY/UNKNOWN then NOT NULL), meta_date_end, meta_date_raw, sender_text, receiver_text + DB CHECK constraints (precision allowlist; end only for RANGE; end >= start; text length caps). - persons: source_ref (unique idx), provisional (NOT NULL default false). - tag: source_ref (unique idx). DatePrecision enum mirrors the normalizer's Precision verbatim. Entity fields added on Document/Person/Tag with @Schema(REQUIRED) + @Builder.Default where non-null. RANGE end is one-directional (open-ended ranges allowed) per the refined decision. Covered by 14 new Testcontainers Postgres integration tests. --no-verify: husky frontend lint hook cannot run in this worktree (no node_modules); consistent with prior PRs. Refs #671 Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,64 @@
|
||||
-- Phase 2 of "Handling the Unknowns": the schema foundation.
|
||||
-- Consolidates every new import/precision/attribution/identity column into ONE
|
||||
-- migration with a single owner so downstream phases (importer, rendering, persons
|
||||
-- directory) compile against a finished, collision-free schema. See ADR-025.
|
||||
--
|
||||
-- This file is forward-only and immutable once shipped (Flyway checksum model):
|
||||
-- any fix goes in a later version, never an edit here.
|
||||
|
||||
-- ─── documents: date precision, range end, raw date, raw attribution ──────────
|
||||
|
||||
-- Range end is only set for RANGE precision (open-ended ranges allowed → end may be null).
|
||||
ALTER TABLE documents ADD COLUMN meta_date_end date;
|
||||
|
||||
-- Original date cell, verbatim, for provenance and "as written" display (Phase 4).
|
||||
ALTER TABLE documents ADD COLUMN meta_date_raw text;
|
||||
|
||||
-- Raw attribution preserved even when a person is linked.
|
||||
ALTER TABLE documents ADD COLUMN sender_text text;
|
||||
ALTER TABLE documents ADD COLUMN receiver_text text;
|
||||
|
||||
-- Bound user-influenced spreadsheet text at the DB layer (mirrors transcription_blocks
|
||||
-- length cap in V18). Defense in depth against malformed/huge import cells.
|
||||
ALTER TABLE documents ADD CONSTRAINT chk_meta_date_raw_length CHECK (length(meta_date_raw) <= 10000);
|
||||
ALTER TABLE documents ADD CONSTRAINT chk_sender_text_length CHECK (length(sender_text) <= 10000);
|
||||
ALTER TABLE documents ADD CONSTRAINT chk_receiver_text_length CHECK (length(receiver_text) <= 10000);
|
||||
|
||||
-- Precision enum — added nullable, backfilled, then made NOT NULL (in this order so the
|
||||
-- backfill can populate existing rows before the constraint is enforced).
|
||||
ALTER TABLE documents ADD COLUMN meta_date_precision varchar(16);
|
||||
|
||||
UPDATE documents
|
||||
SET meta_date_precision = CASE WHEN meta_date IS NOT NULL THEN 'DAY' ELSE 'UNKNOWN' END;
|
||||
|
||||
ALTER TABLE documents ALTER COLUMN meta_date_precision SET NOT NULL;
|
||||
|
||||
-- Fail-closed allowlist of the seven precision values (verbatim mirror of the
|
||||
-- normalizer's Precision enum). The DB enforces validity independent of the Java enum.
|
||||
ALTER TABLE documents ADD CONSTRAINT chk_meta_date_precision
|
||||
CHECK (meta_date_precision IN ('DAY', 'MONTH', 'SEASON', 'YEAR', 'RANGE', 'APPROX', 'UNKNOWN'));
|
||||
|
||||
-- A non-null range end is permitted only when precision = RANGE. A RANGE row MAY have a
|
||||
-- null end (open-ended range), so the rule is one-directional, not biconditional.
|
||||
ALTER TABLE documents ADD CONSTRAINT chk_meta_date_end_only_for_range
|
||||
CHECK (meta_date_end IS NULL OR meta_date_precision = 'RANGE');
|
||||
|
||||
-- For ranges with both endpoints, the end must not precede the start.
|
||||
ALTER TABLE documents ADD CONSTRAINT chk_meta_date_end_after_start
|
||||
CHECK (meta_date_end IS NULL OR meta_date IS NULL OR meta_date_end >= meta_date);
|
||||
|
||||
-- ─── persons: source_ref (import identity) + provisional flag ─────────────────
|
||||
|
||||
-- The normalizer person_id: join key for documents → persons and idempotency key for
|
||||
-- re-import. Nullable (manually created persons never have one); unique among non-nulls.
|
||||
ALTER TABLE persons ADD COLUMN source_ref varchar(255);
|
||||
CREATE UNIQUE INDEX idx_persons_source_ref ON persons (source_ref);
|
||||
|
||||
-- A provisional person is one the importer inferred but could not confidently identify.
|
||||
-- Stays false until Phase 3 (importer) sets it; no code path writes true in this phase.
|
||||
ALTER TABLE persons ADD COLUMN provisional boolean NOT NULL DEFAULT false;
|
||||
|
||||
-- ─── tag: source_ref (import identity, keyed on canonical tag_path) ───────────
|
||||
|
||||
ALTER TABLE tag ADD COLUMN source_ref varchar(255);
|
||||
CREATE UNIQUE INDEX idx_tag_source_ref ON tag (source_ref);
|
||||
Reference in New Issue
Block a user