feat(lesereisen): implement lesereisen
All checks were successful
CI / Unit & Component Tests (push) Successful in 4m34s
CI / OCR Service Tests (push) Successful in 27s
CI / Backend Unit Tests (push) Successful in 5m1s
CI / fail2ban Regex (push) Successful in 47s
CI / Semgrep Security Scan (push) Successful in 23s
CI / Compose Bucket Idempotency (push) Successful in 1m11s

This commit was merged in pull request #787.
This commit is contained in:
2026-06-12 14:04:02 +02:00
parent 4bcf568ed4
commit b33d0eb850
142 changed files with 11643 additions and 917 deletions

View File

@@ -0,0 +1,73 @@
-- Production pre-requisite — run BEFORE applying this migration:
-- docker exec familienarchiv-db sh -c 'psql -U "$POSTGRES_USER" -d "$POSTGRES_DB" \
-- -c "SELECT COUNT(DISTINCT (geschichte_id, document_id)) FROM geschichten_documents;"'
-- docker exec familienarchiv-db sh -c 'pg_dump -U "$POSTGRES_USER" "$POSTGRES_DB" \
-- --table=geschichten_documents \
-- -f /tmp/pre_v72_backup_'"$(date +%Y%m%d)"'.sql'
-- Take the dump even if geschichten_documents is empty — it captures the table DEFINITION
-- for emergency reconstruction. The DROP TABLE is the only irreversible step; the
-- INSERT...SELECT is a no-op when there is no data. No DDL rollback path exists after commit.
--
-- REVERSE PROCEDURE (if V72 must be rolled back): restore the pre-V72 dump, then re-derive
-- the junction from the new table:
-- INSERT INTO geschichten_documents (geschichte_id, document_id)
-- SELECT geschichte_id, document_id FROM journey_items WHERE document_id IS NOT NULL;
-- Note: the reconstructed junction FK is ON DELETE CASCADE per the original V58
-- (NOT the new SET NULL of journey_items). Domain FKs target app_users (post-V60) —
-- do NOT hand-type V58's verbatim "REFERENCES users" DDL nor copy journey_items' SET NULL
-- into the reconstructed junction.
--
-- ASSUMPTION AS-001: The old geschichten_documents was an unordered Set — no curator order
-- existed. Ordering by meta_date is a plausible default a Lesereise lets curators
-- re-sequence. This is not a requirement; it is the best available approximation.
--
-- ASSUMPTION AS-002: Existing published Geschichten (STORYs) render the related-letters block;
-- this block visibly degrades to generic links (loss of per-document title AND date) for ALL
-- current readers during the stub window. Accepted because the reader follow-on is the
-- next-priority blocking dependency.
-- Step 1: Add type discriminator column to geschichten
ALTER TABLE geschichten
ADD COLUMN type VARCHAR(50) DEFAULT 'STORY' NOT NULL;
-- Step 2: Create journey_items table
CREATE TABLE journey_items (
id UUID NOT NULL DEFAULT gen_random_uuid(),
geschichte_id UUID NOT NULL,
position INT NOT NULL,
document_id UUID,
note TEXT,
CONSTRAINT pk_journey_items PRIMARY KEY (id),
CONSTRAINT fk_journey_items_geschichte
FOREIGN KEY (geschichte_id) REFERENCES geschichten(id) ON DELETE CASCADE,
CONSTRAINT fk_journey_items_document
FOREIGN KEY (document_id) REFERENCES documents(id) ON DELETE SET NULL,
CONSTRAINT chk_journey_item_not_empty
CHECK (document_id IS NOT NULL OR note IS NOT NULL)
);
-- Step 3: Index for ordered retrieval by geschichte + position
CREATE INDEX idx_journey_items_geschichte_position
ON journey_items (geschichte_id, position ASC);
-- Step 4: Migrate geschichten_documents → journey_items
-- Positions are multiples of 1000 (headroom for drag-reorder).
-- Ordered by meta_date ASC NULLS LAST, then documents.id ASC as deterministic tiebreaker.
-- SELECT DISTINCT guards against duplicate junction rows producing duplicate journey items.
INSERT INTO journey_items (id, geschichte_id, position, document_id)
SELECT
gen_random_uuid(),
gd.geschichte_id,
(ROW_NUMBER() OVER (
PARTITION BY gd.geschichte_id
ORDER BY d.meta_date ASC NULLS LAST, d.id ASC
) * 1000)::INT AS position,
gd.document_id
FROM (
SELECT DISTINCT geschichte_id, document_id
FROM geschichten_documents
) gd
LEFT JOIN documents d ON d.id = gd.document_id;
-- Step 5: Drop the old junction table (irreversible — take the pg_dump first)
DROP TABLE geschichten_documents;

View File

@@ -0,0 +1,19 @@
-- Adds the two constraints that V72 deferred:
-- 1. UNIQUE(geschichte_id, position) DEFERRABLE INITIALLY DEFERRED
-- Allows mid-transaction position swaps during reorder (checked at COMMIT, not per-row).
-- Requires transaction-level or session-level connection pooling (prod uses PgBouncer
-- in transaction mode — correct today; a future switch to statement-level would silently
-- break deferred checking at COMMIT).
-- 2. CHECK (position > 0) — defense against off-by-one in the append path.
--
-- MUST run in a single transaction; Flyway's default per-migration transaction satisfies this.
-- Do NOT add executeInTransaction=false or any callback that splits this migration.
ALTER TABLE journey_items
ADD CONSTRAINT uq_journey_items_geschichte_position
UNIQUE (geschichte_id, position)
DEFERRABLE INITIALLY DEFERRED;
ALTER TABLE journey_items
ADD CONSTRAINT chk_journey_item_position
CHECK (position > 0);

View File

@@ -0,0 +1,37 @@
-- Two constraints the service-level checks need as atomic backstops:
--
-- 1. Partial unique index on (geschichte_id, document_id): the append dedup
-- guard is a check-then-insert (existsByGeschichteIdAndDocumentId), so two
-- concurrent appends of the same document can both pass the pre-check.
-- The index rejects the second INSERT; JourneyItemService.append translates
-- the DataIntegrityViolationException into the same 409
-- JOURNEY_DOCUMENT_ALREADY_ADDED as the friendly pre-check.
-- Partial (WHERE document_id IS NOT NULL) — note-only interludes must not collide.
--
-- 2. CHECK on note length: mirrors chk_text_length on transcription_blocks.
-- 2000 is the spec'd limit — JourneyItemService.MAX_NOTE_LENGTH, the frontend
-- maxlength, and the i18n error message all agree (#793).
--
-- Defensive cleanup first: a database that served writes on the base branch
-- (no dedup guard, MAX_NOTE_LENGTH = 5000) can hold rows that would make the
-- DDL below fail mid-migration and boot-loop the backend on a failed Flyway
-- row. Both statements are no-ops on a clean database.
-- Keep the earliest-positioned row of each (geschichte, document) pair.
DELETE FROM journey_items a
USING journey_items b
WHERE a.geschichte_id = b.geschichte_id
AND a.document_id = b.document_id
AND a.document_id IS NOT NULL
AND a.position > b.position;
-- Clamp over-long notes written under the old 5000-char service limit.
UPDATE journey_items SET note = left(note, 2000) WHERE length(note) > 2000;
CREATE UNIQUE INDEX uq_journey_items_geschichte_document
ON journey_items (geschichte_id, document_id)
WHERE document_id IS NOT NULL;
ALTER TABLE journey_items
ADD CONSTRAINT chk_journey_item_note_length
CHECK (note IS NULL OR length(note) <= 2000);

View File

@@ -0,0 +1,16 @@
-- JOURNEY intros travel the verbatim (unsanitized) write path and get the same
-- three-layer bound as journey notes: frontend maxlength, the
-- GeschichteService.MAX_INTRO_LENGTH check, and this CHECK as the atomic backstop.
-- STORY bodies are sanitized Tiptap HTML and stay unbounded on purpose.
--
-- The title needs no CHECK here — VARCHAR(255) (V58) already bounds it at the
-- DB layer; the service-level check exists to turn that 500 into a friendly 400.
-- Defensive clamp first: intros written before this migration may exceed the
-- cap. No-op on a clean database.
UPDATE geschichten SET body = left(body, 4000)
WHERE type = 'JOURNEY' AND length(body) > 4000;
ALTER TABLE geschichten
ADD CONSTRAINT chk_geschichte_journey_intro_length
CHECK (type <> 'JOURNEY' OR body IS NULL OR length(body) <= 4000);