feat(fts): add search_vector column, GIN index, DB triggers, and FTS repository method (V34)
- V34 migration: adds search_vector tsvector column with GIN index - BEFORE INSERT/UPDATE trigger on documents rebuilds vector from title (A), summary + transcription_blocks.text (B), sender/receiver names (C), tag names + location (D) using german FTS config - AFTER triggers on transcription_blocks, document_receivers, document_tags touch the parent document row to re-fire the BEFORE UPDATE trigger - DocumentRepository.findRankedIdsByFts() native query using websearch_to_tsquery - DocumentFtsTest: 12 integration tests covering stemming, trigger sync, ranking, stop words, malformed input, receiver and tag search Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,74 @@
|
||||
-- ─── Full-Text Search: search_vector on documents ──────────────────────────────
|
||||
-- Adds a tsvector column that aggregates: title (A), summary + transcription
|
||||
-- block text (B), sender/receiver names (C), tag names + location (D).
|
||||
-- The column is maintained by DB triggers so the OCR pipeline (which writes
|
||||
-- transcription_blocks directly) stays in sync without JPA @PreUpdate hooks.
|
||||
|
||||
-- 1. Column and GIN index
|
||||
ALTER TABLE documents ADD COLUMN search_vector tsvector;
|
||||
CREATE INDEX idx_documents_search ON documents USING GIN (search_vector);
|
||||
|
||||
-- 2. Trigger function: rebuilds search_vector on documents INSERT or UPDATE.
|
||||
-- Runs BEFORE the write so NEW.search_vector is set inline.
|
||||
CREATE OR REPLACE FUNCTION fn_documents_fts_update() RETURNS trigger AS $$
|
||||
BEGIN
|
||||
NEW.search_vector :=
|
||||
setweight(to_tsvector('german', coalesce(NEW.title, '')), 'A') ||
|
||||
setweight(to_tsvector('german', coalesce(NEW.summary, '')), 'B') ||
|
||||
setweight(to_tsvector('german', coalesce((
|
||||
SELECT string_agg(tb.text, ' ') FILTER (WHERE tb.text IS NOT NULL)
|
||||
FROM transcription_blocks tb
|
||||
WHERE tb.document_id = NEW.id
|
||||
), '')), 'B') ||
|
||||
setweight(to_tsvector('german', coalesce((
|
||||
SELECT coalesce(p.first_name, '') || ' ' || p.last_name
|
||||
FROM persons p
|
||||
WHERE p.id = NEW.sender_id
|
||||
), '')), 'C') ||
|
||||
setweight(to_tsvector('german', coalesce((
|
||||
SELECT string_agg(coalesce(p.first_name, '') || ' ' || p.last_name, ' ')
|
||||
FROM document_receivers dr
|
||||
JOIN persons p ON p.id = dr.person_id
|
||||
WHERE dr.document_id = NEW.id
|
||||
), '')), 'C') ||
|
||||
setweight(to_tsvector('german', coalesce((
|
||||
SELECT string_agg(t.name, ' ')
|
||||
FROM document_tags dt
|
||||
JOIN tag t ON t.id = dt.tag_id
|
||||
WHERE dt.document_id = NEW.id
|
||||
), '')), 'D') ||
|
||||
setweight(to_tsvector('german', coalesce(NEW.meta_location, '')), 'D');
|
||||
RETURN NEW;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
CREATE TRIGGER trg_documents_fts
|
||||
BEFORE INSERT OR UPDATE ON documents
|
||||
FOR EACH ROW EXECUTE FUNCTION fn_documents_fts_update();
|
||||
|
||||
-- 3. Rebuild trigger for join tables and transcription_blocks.
|
||||
-- These tables don't have a search_vector of their own; instead they
|
||||
-- touch the parent document row ("SET title = title") to re-fire the
|
||||
-- BEFORE UPDATE trigger above, which then recomputes the vector with
|
||||
-- the current state of all joined tables.
|
||||
CREATE OR REPLACE FUNCTION fn_rebuild_document_fts() RETURNS trigger AS $$
|
||||
DECLARE
|
||||
v_doc_id UUID;
|
||||
BEGIN
|
||||
v_doc_id := CASE WHEN TG_OP = 'DELETE' THEN OLD.document_id ELSE NEW.document_id END;
|
||||
UPDATE documents SET title = title WHERE id = v_doc_id;
|
||||
RETURN NULL;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
CREATE TRIGGER trg_transcription_blocks_fts
|
||||
AFTER INSERT OR UPDATE OR DELETE ON transcription_blocks
|
||||
FOR EACH ROW EXECUTE FUNCTION fn_rebuild_document_fts();
|
||||
|
||||
CREATE TRIGGER trg_document_receivers_fts
|
||||
AFTER INSERT OR DELETE ON document_receivers
|
||||
FOR EACH ROW EXECUTE FUNCTION fn_rebuild_document_fts();
|
||||
|
||||
CREATE TRIGGER trg_document_tags_fts
|
||||
AFTER INSERT OR DELETE ON document_tags
|
||||
FOR EACH ROW EXECUTE FUNCTION fn_rebuild_document_fts();
|
||||
Reference in New Issue
Block a user