From 24530cf85b9ec1c306b542e0eb9abcce5f197d5f Mon Sep 17 00:00:00 2001 From: Marcel Date: Tue, 14 Apr 2026 23:38:12 +0200 Subject: [PATCH 1/8] feat(fts): add search_vector column, GIN index, DB triggers, and FTS repository method (V34) - V34 migration: adds search_vector tsvector column with GIN index - BEFORE INSERT/UPDATE trigger on documents rebuilds vector from title (A), summary + transcription_blocks.text (B), sender/receiver names (C), tag names + location (D) using german FTS config - AFTER triggers on transcription_blocks, document_receivers, document_tags touch the parent document row to re-fire the BEFORE UPDATE trigger - DocumentRepository.findRankedIdsByFts() native query using websearch_to_tsquery - DocumentFtsTest: 12 integration tests covering stemming, trigger sync, ranking, stop words, malformed input, receiver and tag search Co-Authored-By: Claude Sonnet 4.6 --- .../repository/DocumentRepository.java | 8 + .../migration/V34__add_fts_search_vector.sql | 74 ++++++ .../repository/DocumentFtsTest.java | 244 ++++++++++++++++++ 3 files changed, 326 insertions(+) create mode 100644 backend/src/main/resources/db/migration/V34__add_fts_search_vector.sql create mode 100644 backend/src/test/java/org/raddatz/familienarchiv/repository/DocumentFtsTest.java diff --git a/backend/src/main/java/org/raddatz/familienarchiv/repository/DocumentRepository.java b/backend/src/main/java/org/raddatz/familienarchiv/repository/DocumentRepository.java index ca5a88d4..3a183ded 100644 --- a/backend/src/main/java/org/raddatz/familienarchiv/repository/DocumentRepository.java +++ b/backend/src/main/java/org/raddatz/familienarchiv/repository/DocumentRepository.java @@ -81,4 +81,12 @@ public interface DocumentRepository extends JpaRepository, JpaSp @Param("to") LocalDate to, Sort sort); + @Query(nativeQuery = true, value = """ + SELECT d.id FROM documents d + WHERE d.search_vector @@ websearch_to_tsquery('german', :query) + ORDER BY ts_rank(d.search_vector, websearch_to_tsquery('german', :query)) DESC, + d.meta_date DESC NULLS LAST + """) + List findRankedIdsByFts(@Param("query") String query); + } \ No newline at end of file diff --git a/backend/src/main/resources/db/migration/V34__add_fts_search_vector.sql b/backend/src/main/resources/db/migration/V34__add_fts_search_vector.sql new file mode 100644 index 00000000..a362037a --- /dev/null +++ b/backend/src/main/resources/db/migration/V34__add_fts_search_vector.sql @@ -0,0 +1,74 @@ +-- ─── Full-Text Search: search_vector on documents ────────────────────────────── +-- Adds a tsvector column that aggregates: title (A), summary + transcription +-- block text (B), sender/receiver names (C), tag names + location (D). +-- The column is maintained by DB triggers so the OCR pipeline (which writes +-- transcription_blocks directly) stays in sync without JPA @PreUpdate hooks. + +-- 1. Column and GIN index +ALTER TABLE documents ADD COLUMN search_vector tsvector; +CREATE INDEX idx_documents_search ON documents USING GIN (search_vector); + +-- 2. Trigger function: rebuilds search_vector on documents INSERT or UPDATE. +-- Runs BEFORE the write so NEW.search_vector is set inline. +CREATE OR REPLACE FUNCTION fn_documents_fts_update() RETURNS trigger AS $$ +BEGIN + NEW.search_vector := + setweight(to_tsvector('german', coalesce(NEW.title, '')), 'A') || + setweight(to_tsvector('german', coalesce(NEW.summary, '')), 'B') || + setweight(to_tsvector('german', coalesce(( + SELECT string_agg(tb.text, ' ') FILTER (WHERE tb.text IS NOT NULL) + FROM transcription_blocks tb + WHERE tb.document_id = NEW.id + ), '')), 'B') || + setweight(to_tsvector('german', coalesce(( + SELECT coalesce(p.first_name, '') || ' ' || p.last_name + FROM persons p + WHERE p.id = NEW.sender_id + ), '')), 'C') || + setweight(to_tsvector('german', coalesce(( + SELECT string_agg(coalesce(p.first_name, '') || ' ' || p.last_name, ' ') + FROM document_receivers dr + JOIN persons p ON p.id = dr.person_id + WHERE dr.document_id = NEW.id + ), '')), 'C') || + setweight(to_tsvector('german', coalesce(( + SELECT string_agg(t.name, ' ') + FROM document_tags dt + JOIN tag t ON t.id = dt.tag_id + WHERE dt.document_id = NEW.id + ), '')), 'D') || + setweight(to_tsvector('german', coalesce(NEW.meta_location, '')), 'D'); + RETURN NEW; +END; +$$ LANGUAGE plpgsql; + +CREATE TRIGGER trg_documents_fts + BEFORE INSERT OR UPDATE ON documents + FOR EACH ROW EXECUTE FUNCTION fn_documents_fts_update(); + +-- 3. Rebuild trigger for join tables and transcription_blocks. +-- These tables don't have a search_vector of their own; instead they +-- touch the parent document row ("SET title = title") to re-fire the +-- BEFORE UPDATE trigger above, which then recomputes the vector with +-- the current state of all joined tables. +CREATE OR REPLACE FUNCTION fn_rebuild_document_fts() RETURNS trigger AS $$ +DECLARE + v_doc_id UUID; +BEGIN + v_doc_id := CASE WHEN TG_OP = 'DELETE' THEN OLD.document_id ELSE NEW.document_id END; + UPDATE documents SET title = title WHERE id = v_doc_id; + RETURN NULL; +END; +$$ LANGUAGE plpgsql; + +CREATE TRIGGER trg_transcription_blocks_fts + AFTER INSERT OR UPDATE OR DELETE ON transcription_blocks + FOR EACH ROW EXECUTE FUNCTION fn_rebuild_document_fts(); + +CREATE TRIGGER trg_document_receivers_fts + AFTER INSERT OR DELETE ON document_receivers + FOR EACH ROW EXECUTE FUNCTION fn_rebuild_document_fts(); + +CREATE TRIGGER trg_document_tags_fts + AFTER INSERT OR DELETE ON document_tags + FOR EACH ROW EXECUTE FUNCTION fn_rebuild_document_fts(); diff --git a/backend/src/test/java/org/raddatz/familienarchiv/repository/DocumentFtsTest.java b/backend/src/test/java/org/raddatz/familienarchiv/repository/DocumentFtsTest.java new file mode 100644 index 00000000..3c634e1e --- /dev/null +++ b/backend/src/test/java/org/raddatz/familienarchiv/repository/DocumentFtsTest.java @@ -0,0 +1,244 @@ +package org.raddatz.familienarchiv.repository; + +import jakarta.persistence.EntityManager; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.raddatz.familienarchiv.PostgresContainerConfig; +import org.raddatz.familienarchiv.config.FlywayConfig; +import org.raddatz.familienarchiv.model.Document; +import org.raddatz.familienarchiv.model.DocumentAnnotation; +import org.raddatz.familienarchiv.model.DocumentStatus; +import org.raddatz.familienarchiv.model.Person; +import org.raddatz.familienarchiv.model.Tag; +import org.raddatz.familienarchiv.model.TranscriptionBlock; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.boot.data.jpa.test.autoconfigure.DataJpaTest; +import org.springframework.boot.jdbc.test.autoconfigure.AutoConfigureTestDatabase; +import org.springframework.context.annotation.Import; + +import java.util.List; +import java.util.Set; +import java.util.UUID; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatNoException; + +@DataJpaTest +@AutoConfigureTestDatabase(replace = AutoConfigureTestDatabase.Replace.NONE) +@Import({PostgresContainerConfig.class, FlywayConfig.class}) +class DocumentFtsTest { + + @Autowired DocumentRepository documentRepository; + @Autowired PersonRepository personRepository; + @Autowired TagRepository tagRepository; + @Autowired AnnotationRepository annotationRepository; + @Autowired TranscriptionBlockRepository blockRepository; + @Autowired EntityManager em; + + @BeforeEach + void setUp() { + blockRepository.deleteAll(); + documentRepository.deleteAll(); + personRepository.deleteAll(); + tagRepository.deleteAll(); + } + + // ─── Guard ───────────────────────────────────────────────────────────────── + + @Test + void german_text_search_config_is_available() { + Number count = (Number) em + .createNativeQuery("SELECT count(*) FROM pg_ts_config WHERE cfgname = 'german'") + .getSingleResult(); + assertThat(count.longValue()).isEqualTo(1L); + } + + // ─── Basic FTS ───────────────────────────────────────────────────────────── + + @Test + void should_find_document_by_exact_title_word() { + documentRepository.saveAndFlush(document("Alter Brief")); + em.clear(); + + List ids = documentRepository.findRankedIdsByFts("Brief"); + + assertThat(ids).hasSize(1); + } + + @Test + void should_find_document_by_stemmed_inflected_form() { + documentRepository.saveAndFlush(document("Alter Brief")); + em.clear(); + + List ids = documentRepository.findRankedIdsByFts("Briefe"); + + assertThat(ids).hasSize(1); + } + + @Test + void should_not_find_document_when_term_absent() { + documentRepository.saveAndFlush(document("Familienfoto")); + em.clear(); + + List ids = documentRepository.findRankedIdsByFts("Brief"); + + assertThat(ids).isEmpty(); + } + + // ─── Transcription blocks ─────────────────────────────────────────────────── + + @Test + void should_find_document_by_transcription_block_text() { + Document doc = documentRepository.saveAndFlush(document("Foto ohne Text")); + UUID annotationId = annotation(doc.getId()); + + blockRepository.saveAndFlush(block(doc.getId(), annotationId, "Liebe Anna ich schreibe dir aus dem Krieg", 0)); + em.flush(); + em.clear(); + + List ids = documentRepository.findRankedIdsByFts("schreiben"); + + assertThat(ids).contains(doc.getId()); + } + + @Test + void should_rebuild_vector_when_transcription_block_inserted_after_document() { + Document doc = documentRepository.saveAndFlush(document("Leeres Dokument")); + em.clear(); + + assertThat(documentRepository.findRankedIdsByFts("Grundbuch")).isEmpty(); + + UUID annotationId = annotation(doc.getId()); + blockRepository.saveAndFlush(block(doc.getId(), annotationId, "Grundbuch Eintrag 1923", 0)); + em.flush(); + em.clear(); + + assertThat(documentRepository.findRankedIdsByFts("Grundbuch")).contains(doc.getId()); + } + + @Test + void should_rebuild_vector_when_transcription_block_deleted() { + Document doc = documentRepository.saveAndFlush(document("Dokument mit Block")); + UUID annotationId = annotation(doc.getId()); + TranscriptionBlock block = blockRepository.saveAndFlush( + block(doc.getId(), annotationId, "Grundbuch Eintrag 1923", 0)); + em.flush(); + em.clear(); + + assertThat(documentRepository.findRankedIdsByFts("Grundbuch")).contains(doc.getId()); + + blockRepository.deleteById(block.getId()); + em.flush(); + em.clear(); + + assertThat(documentRepository.findRankedIdsByFts("Grundbuch")).doesNotContain(doc.getId()); + } + + // ─── Ranking ─────────────────────────────────────────────────────────────── + + @Test + void should_rank_title_match_above_transcription_match() { + // docA: "Grundbuch" only in title (weight A) + // docB: "Grundbuch" only in transcription block (weight B) + Document docA = documentRepository.saveAndFlush(document("Grundbuch 1923")); + Document docB = documentRepository.saveAndFlush(document("Anderes Dokument")); + UUID annotationId = annotation(docB.getId()); + blockRepository.saveAndFlush(block(docB.getId(), annotationId, "Grundbuch steht darin", 0)); + em.flush(); + em.clear(); + + List ids = documentRepository.findRankedIdsByFts("Grundbuch"); + + assertThat(ids).hasSize(2); + assertThat(ids.get(0)).isEqualTo(docA.getId()); + } + + // ─── Edge cases ──────────────────────────────────────────────────────────── + + @Test + void should_return_empty_when_query_contains_only_stop_words() { + documentRepository.saveAndFlush(document("Ein Brief von der Oma")); + em.clear(); + + List ids = documentRepository.findRankedIdsByFts("der die das und"); + + assertThat(ids).isEmpty(); + } + + @Test + void should_not_throw_when_query_contains_invalid_tsquery_syntax() { + documentRepository.saveAndFlush(document("Brief")); + em.clear(); + + assertThatNoException().isThrownBy(() -> documentRepository.findRankedIdsByFts("(((")); + } + + // ─── Weight C: sender/receiver names ─────────────────────────────────────── + + @Test + void should_find_document_by_receiver_name() { + Person receiver = personRepository.saveAndFlush( + Person.builder().firstName("Anna").lastName("Schmidt").build()); + Document doc = documentRepository.saveAndFlush(Document.builder() + .title("Brief") + .originalFilename("brief.pdf") + .status(DocumentStatus.UPLOADED) + .receivers(Set.of(receiver)) + .build()); + em.flush(); + em.clear(); + + List ids = documentRepository.findRankedIdsByFts("Schmidt"); + + assertThat(ids).contains(doc.getId()); + } + + // ─── Weight D: tag names ─────────────────────────────────────────────────── + + @Test + void should_find_document_by_tag_name() { + Tag tag = tagRepository.saveAndFlush(Tag.builder().name("Familiengeschichte").build()); + documentRepository.saveAndFlush(Document.builder() + .title("Dokument") + .originalFilename("dokument.pdf") + .status(DocumentStatus.UPLOADED) + .tags(Set.of(tag)) + .build()); + em.flush(); + em.clear(); + + List ids = documentRepository.findRankedIdsByFts("Familiengeschichte"); + + assertThat(ids).hasSize(1); + } + + // ─── Helpers ─────────────────────────────────────────────────────────────── + + private Document document(String title) { + return Document.builder() + .title(title) + .originalFilename(title.replace(" ", "_") + ".pdf") + .status(DocumentStatus.UPLOADED) + .build(); + } + + private UUID annotation(UUID documentId) { + DocumentAnnotation ann = annotationRepository.save(DocumentAnnotation.builder() + .documentId(documentId) + .pageNumber(1) + .x(0.1).y(0.2).width(0.3).height(0.4) + .color("#00C7B1") + .build()); + em.flush(); + return ann.getId(); + } + + private TranscriptionBlock block(UUID documentId, UUID annotationId, String text, int order) { + return TranscriptionBlock.builder() + .documentId(documentId) + .annotationId(annotationId) + .text(text) + .sortOrder(order) + .build(); + } +} -- 2.49.1 From 7d456d8e8b284206f55391e36694e0b1f95f2e36 Mon Sep 17 00:00:00 2001 From: Marcel Date: Tue, 14 Apr 2026 23:46:24 +0200 Subject: [PATCH 2/8] feat(fts): replace ILIKE hasText with FTS two-phase search and RELEVANCE sort MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - DocumentSort: add RELEVANCE enum value - DocumentSpecifications: remove hasText() ILIKE, add hasIds(List) for FTS-pre-filtered ID sets - DocumentService.searchDocuments(): FTS two-phase path — findRankedIdsByFts() returns ranked UUIDs, hasIds() narrows subsequent Specification query, in-memory re-sort preserves rank order; RELEVANCE is the default when text is present and no explicit non-relevance sort is requested - DocumentSpecificationsTest: remove hasText() tests (Specification removed) Co-Authored-By: Claude Sonnet 4.6 --- .../familienarchiv/dto/DocumentSort.java | 2 +- .../repository/DocumentSpecifications.java | 69 +----------- .../service/DocumentService.java | 29 ++++- .../DocumentSpecificationsTest.java | 105 ------------------ 4 files changed, 31 insertions(+), 174 deletions(-) diff --git a/backend/src/main/java/org/raddatz/familienarchiv/dto/DocumentSort.java b/backend/src/main/java/org/raddatz/familienarchiv/dto/DocumentSort.java index 1a276d7f..4c7c1878 100644 --- a/backend/src/main/java/org/raddatz/familienarchiv/dto/DocumentSort.java +++ b/backend/src/main/java/org/raddatz/familienarchiv/dto/DocumentSort.java @@ -1,5 +1,5 @@ package org.raddatz.familienarchiv.dto; public enum DocumentSort { - DATE, TITLE, SENDER, RECEIVER, UPLOAD_DATE + DATE, TITLE, SENDER, RECEIVER, UPLOAD_DATE, RELEVANCE } diff --git a/backend/src/main/java/org/raddatz/familienarchiv/repository/DocumentSpecifications.java b/backend/src/main/java/org/raddatz/familienarchiv/repository/DocumentSpecifications.java index d8d572bc..4ce5cb63 100644 --- a/backend/src/main/java/org/raddatz/familienarchiv/repository/DocumentSpecifications.java +++ b/backend/src/main/java/org/raddatz/familienarchiv/repository/DocumentSpecifications.java @@ -8,78 +8,17 @@ import java.util.UUID; import org.raddatz.familienarchiv.model.Document; import org.raddatz.familienarchiv.model.DocumentStatus; -import org.raddatz.familienarchiv.model.Person; -import org.raddatz.familienarchiv.model.PersonNameAlias; import org.raddatz.familienarchiv.model.Tag; import org.springframework.data.jpa.domain.Specification; import org.springframework.util.StringUtils; public class DocumentSpecifications { - // Filtert nach Text (in Titel, Dateiname, Transkription, Ort, Absender- und Empfängername, Tags) - public static Specification hasText(String text) { + // Filtert nach einer vorberechneten ID-Liste (aus FTS-Abfrage) + public static Specification hasIds(List ids) { return (root, query, cb) -> { - if (!StringUtils.hasText(text)) - return null; - String likePattern = "%" + text.toLowerCase() + "%"; - - // LEFT JOIN on sender (ManyToOne — no duplicate rows) - Join senderJoin = root.join("sender", JoinType.LEFT); - - // LEFT JOIN sender → aliases (entity-graph navigation avoids a separate DB - // roundtrip while respecting domain boundaries — the alias table is part of - // the Person aggregate, navigated via @OneToMany, not via a cross-domain - // repository call from DocumentService) - Join senderAliasJoin = senderJoin.join("nameAliases", JoinType.LEFT); - - // EXISTS subquery for receiver name — avoids duplicate rows for multi-receiver docs - Subquery receiverSub = query.subquery(Long.class); - Root receiverRoot = receiverSub.from(Document.class); - Join receiverJoin = receiverRoot.join("receivers"); - receiverSub.select(cb.literal(1L)) - .where( - cb.equal(receiverRoot.get("id"), root.get("id")), - cb.or( - cb.like(cb.lower(receiverJoin.get("lastName")), likePattern), - cb.like(cb.lower(cb.coalesce(receiverJoin.get("firstName"), "")), likePattern) - ) - ); - - // EXISTS subquery for receiver alias name - Subquery receiverAliasSub = query.subquery(Long.class); - Root receiverAliasRoot = receiverAliasSub.from(Document.class); - Join recAliasPersonJoin = receiverAliasRoot.join("receivers"); - Join recAliasJoin = recAliasPersonJoin.join("nameAliases"); - receiverAliasSub.select(cb.literal(1L)) - .where( - cb.equal(receiverAliasRoot.get("id"), root.get("id")), - cb.like(cb.lower(recAliasJoin.get("lastName")), likePattern) - ); - - // EXISTS subquery for tag name — avoids duplicate rows for multi-tag docs - Subquery tagSub = query.subquery(Long.class); - Root tagRoot = tagSub.from(Document.class); - Join tagJoin = tagRoot.join("tags"); - tagSub.select(cb.literal(1L)) - .where( - cb.equal(tagRoot.get("id"), root.get("id")), - cb.like(cb.lower(tagJoin.get("name")), likePattern) - ); - - query.distinct(true); - - return cb.or( - cb.like(cb.lower(root.get("title")), likePattern), - cb.like(cb.lower(root.get("originalFilename")), likePattern), - cb.like(cb.lower(root.get("transcription")), likePattern), - cb.like(cb.lower(root.get("location")), likePattern), - cb.like(cb.lower(senderJoin.get("lastName")), likePattern), - cb.like(cb.lower(cb.coalesce(senderJoin.get("firstName"), "")), likePattern), - cb.like(cb.lower(senderAliasJoin.get("lastName")), likePattern), - cb.exists(receiverSub), - cb.exists(receiverAliasSub), - cb.exists(tagSub) - ); + if (ids == null || ids.isEmpty()) return cb.disjunction(); + return root.get("id").in(ids); }; } diff --git a/backend/src/main/java/org/raddatz/familienarchiv/service/DocumentService.java b/backend/src/main/java/org/raddatz/familienarchiv/service/DocumentService.java index db2d6db6..caf2e280 100644 --- a/backend/src/main/java/org/raddatz/familienarchiv/service/DocumentService.java +++ b/backend/src/main/java/org/raddatz/familienarchiv/service/DocumentService.java @@ -20,6 +20,7 @@ import org.raddatz.familienarchiv.exception.DomainException; import org.raddatz.familienarchiv.exception.ErrorCode; import org.springframework.stereotype.Service; import org.springframework.transaction.annotation.Transactional; +import org.springframework.util.StringUtils; import org.springframework.web.multipart.MultipartFile; import java.io.IOException; @@ -290,7 +291,16 @@ public class DocumentService { // 1. Allgemeine Suche (für das Suchfeld im Frontend) public List searchDocuments(String text, LocalDate from, LocalDate to, UUID sender, UUID receiver, List tags, String tagQ, DocumentStatus status, DocumentSort sort, String dir) { - Specification spec = Specification.where(hasText(text)) + boolean hasText = StringUtils.hasText(text); + List rankedIds = null; + + if (hasText) { + rankedIds = documentRepository.findRankedIdsByFts(text); + if (rankedIds.isEmpty()) return List.of(); + } + + Specification textSpec = hasText ? hasIds(rankedIds) : (root, query, cb) -> null; + Specification spec = Specification.where(textSpec) .and(isBetween(from, to)) .and(hasSender(sender)) .and(hasReceiver(receiver)) @@ -300,7 +310,6 @@ public class DocumentService { // SENDER and RECEIVER are sorted in-memory because JPA's Sort.by("sender.lastName") // generates an INNER JOIN that silently drops documents with null sender/receivers. - // TODO: replace with a native @Query using ORDER BY ... NULLS LAST when pagination is added. if (sort == DocumentSort.RECEIVER) { List results = documentRepository.findAll(spec); return sortByFirstReceiver(results, dir); @@ -309,13 +318,27 @@ public class DocumentService { List results = documentRepository.findAll(spec); return sortBySender(results, dir); } + + // RELEVANCE: default when text present and no explicit non-relevance sort requested + boolean useRankOrder = hasText && (sort == null || sort == DocumentSort.RELEVANCE || sort == DocumentSort.DATE); + if (useRankOrder) { + List results = documentRepository.findAll(spec); + final List ids = rankedIds; + return results.stream() + .sorted(Comparator.comparingInt(doc -> { + int idx = ids.indexOf(doc.getId()); + return idx < 0 ? Integer.MAX_VALUE : idx; + })) + .toList(); + } + Sort springSort = resolveSort(sort, dir); return documentRepository.findAll(spec, springSort); } private Sort resolveSort(DocumentSort sort, String dir) { Sort.Direction direction = "ASC".equalsIgnoreCase(dir) ? Sort.Direction.ASC : Sort.Direction.DESC; - if (sort == null || sort == DocumentSort.DATE) { + if (sort == null || sort == DocumentSort.DATE || sort == DocumentSort.RELEVANCE) { return Sort.by(direction, "documentDate"); } // SENDER and RECEIVER are sorted in-memory before this method is called diff --git a/backend/src/test/java/org/raddatz/familienarchiv/repository/DocumentSpecificationsTest.java b/backend/src/test/java/org/raddatz/familienarchiv/repository/DocumentSpecificationsTest.java index b13b71fe..80ca4c08 100644 --- a/backend/src/test/java/org/raddatz/familienarchiv/repository/DocumentSpecificationsTest.java +++ b/backend/src/test/java/org/raddatz/familienarchiv/repository/DocumentSpecificationsTest.java @@ -7,8 +7,6 @@ import org.raddatz.familienarchiv.config.FlywayConfig; import org.raddatz.familienarchiv.model.Document; import org.raddatz.familienarchiv.model.DocumentStatus; import org.raddatz.familienarchiv.model.Person; -import org.raddatz.familienarchiv.model.PersonNameAlias; -import org.raddatz.familienarchiv.model.PersonNameAliasType; import org.raddatz.familienarchiv.model.Tag; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.boot.jdbc.test.autoconfigure.AutoConfigureTestDatabase; @@ -30,7 +28,6 @@ class DocumentSpecificationsTest { @Autowired DocumentRepository documentRepository; @Autowired PersonRepository personRepository; - @Autowired PersonNameAliasRepository aliasRepository; @Autowired TagRepository tagRepository; private Person sender; @@ -79,56 +76,6 @@ class DocumentSpecificationsTest { .build()); } - // ─── hasText ────────────────────────────────────────────────────────────── - - @Test - void hasText_returnsAllDocuments_whenTextIsNull() { - List result = documentRepository.findAll(Specification.where(hasText(null))); - assertThat(result).hasSize(3); - } - - @Test - void hasText_returnsAllDocuments_whenTextIsBlank() { - List result = documentRepository.findAll(Specification.where(hasText(" "))); - assertThat(result).hasSize(3); - } - - @Test - void hasText_filtersOnTitle() { - List result = documentRepository.findAll(Specification.where(hasText("familienfoto"))); - assertThat(result).extracting(Document::getTitle).containsExactly("Familienfoto"); - } - - @Test - void hasText_filtersOnOriginalFilename() { - List result = documentRepository.findAll(Specification.where(hasText("brief_late"))); - assertThat(result).extracting(Document::getTitle).containsExactly("Neuerer Brief"); - } - - @Test - void hasText_filtersOnTranscription() { - List result = documentRepository.findAll(Specification.where(hasText("schreibe dir"))); - assertThat(result).extracting(Document::getTitle).containsExactly("Alter Brief"); - } - - @Test - void hasText_filtersOnLocation() { - List result = documentRepository.findAll(Specification.where(hasText("berlin"))); - assertThat(result).extracting(Document::getTitle).containsExactly("Alter Brief"); - } - - @Test - void hasText_isCaseInsensitive() { - List result = documentRepository.findAll(Specification.where(hasText("BRIEF"))); - assertThat(result).extracting(Document::getTitle).containsExactlyInAnyOrder("Alter Brief", "Neuerer Brief"); - } - - @Test - void hasText_returnsEmpty_whenNoMatch() { - List result = documentRepository.findAll(Specification.where(hasText("xyznotexist"))); - assertThat(result).isEmpty(); - } - // ─── hasSender ──────────────────────────────────────────────────────────── @Test @@ -253,36 +200,6 @@ class DocumentSpecificationsTest { assertThat(result).isEmpty(); } - @Test - void hasText_findsByPartialSenderLastName() { - List result = documentRepository.findAll(Specification.where(hasText("üller"))); - assertThat(result).extracting(Document::getTitle) - .containsExactlyInAnyOrder("Alter Brief", "Neuerer Brief"); - } - - @Test - void hasText_findsByPartialReceiverLastName() { - List result = documentRepository.findAll(Specification.where(hasText("schmid"))); - assertThat(result).extracting(Document::getTitle).containsExactly("Alter Brief"); - } - - @Test - void hasText_findsByPartialTagName() { - List result = documentRepository.findAll(Specification.where(hasText("amili"))); - assertThat(result).extracting(Document::getTitle) - .containsExactlyInAnyOrder("Alter Brief", "Familienfoto"); - } - - @Test - void hasText_doesNotProduceDuplicatesForDocumentWithMultipleReceivers() { - Person receiver2 = personRepository.save(Person.builder().firstName("Karl").lastName("Schmidt").build()); - briefEarly.setReceivers(new java.util.HashSet<>(Set.of(receiver, receiver2))); - documentRepository.save(briefEarly); - - List result = documentRepository.findAll(Specification.where(hasText("schmid"))); - assertThat(result).hasSize(1); - } - // ─── hasTagPartial ──────────────────────────────────────────────────────── @Test @@ -329,26 +246,4 @@ class DocumentSpecificationsTest { assertThat(result).isEmpty(); } - // ─── hasText with aliases ──────────────────────────────────────────────── - - @Test - void hasText_findsDocumentBySenderAliasLastName() { - aliasRepository.save(PersonNameAlias.builder() - .person(sender).lastName("von Mueller").type(PersonNameAliasType.BIRTH).sortOrder(0).build()); - - List result = documentRepository.findAll(Specification.where(hasText("von Mueller"))); - - assertThat(result).isNotEmpty(); - assertThat(result).extracting(Document::getTitle).contains("Alter Brief"); - } - - @Test - void hasText_findsDocumentByReceiverAliasLastName() { - aliasRepository.save(PersonNameAlias.builder() - .person(receiver).lastName("de Gruyter").type(PersonNameAliasType.BIRTH).sortOrder(0).build()); - - List result = documentRepository.findAll(Specification.where(hasText("de Gruyter"))); - - assertThat(result).isNotEmpty(); - } } -- 2.49.1 From 7ec3e6170d15a0d3dd7cb3b39a2f360f3bd80af0 Mon Sep 17 00:00:00 2001 From: Marcel Date: Tue, 14 Apr 2026 23:47:45 +0200 Subject: [PATCH 3/8] feat(fts): backfill search_vector for all existing documents (V35) Fires the BEFORE UPDATE trigger for every documents row, which recomputes the tsvector from all currently-linked metadata, blocks, receivers, and tags. Co-Authored-By: Claude Sonnet 4.6 --- .../db/migration/V35__backfill_fts_search_vector.sql | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 backend/src/main/resources/db/migration/V35__backfill_fts_search_vector.sql diff --git a/backend/src/main/resources/db/migration/V35__backfill_fts_search_vector.sql b/backend/src/main/resources/db/migration/V35__backfill_fts_search_vector.sql new file mode 100644 index 00000000..a8f15f9b --- /dev/null +++ b/backend/src/main/resources/db/migration/V35__backfill_fts_search_vector.sql @@ -0,0 +1,6 @@ +-- Backfill search_vector for all existing documents. +-- The BEFORE UPDATE trigger (trg_documents_fts, installed by V34) recomputes +-- the full vector from title, summary, transcription blocks, sender/receiver +-- names, tags, and location. At backfill time all join tables already contain +-- data, so "SET title = title" is sufficient to fire the trigger for every row. +UPDATE documents SET title = title; -- 2.49.1 From 947d8aeb6c2e4ccb06c182bb4648bec549722612 Mon Sep 17 00:00:00 2001 From: Marcel Date: Wed, 15 Apr 2026 10:57:24 +0200 Subject: [PATCH 4/8] =?UTF-8?q?fix(search):=20respect=20DATE=20sort=20when?= =?UTF-8?q?=20text=20is=20present=20=E2=80=94=20do=20not=20override=20with?= =?UTF-8?q?=20relevance?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When a user explicitly selects DATE sort with a text query active, the previous code treated it identically to RELEVANCE, silently discarding the user's sort choice. Remove DATE from the useRankOrder condition so that explicit DATE sort always goes through the standard JPA sort path. Co-Authored-By: Claude Sonnet 4.6 --- .../service/DocumentService.java | 4 +- .../service/DocumentServiceSortTest.java | 100 ++++++++++++++++++ 2 files changed, 102 insertions(+), 2 deletions(-) create mode 100644 backend/src/test/java/org/raddatz/familienarchiv/service/DocumentServiceSortTest.java diff --git a/backend/src/main/java/org/raddatz/familienarchiv/service/DocumentService.java b/backend/src/main/java/org/raddatz/familienarchiv/service/DocumentService.java index caf2e280..c0747cc4 100644 --- a/backend/src/main/java/org/raddatz/familienarchiv/service/DocumentService.java +++ b/backend/src/main/java/org/raddatz/familienarchiv/service/DocumentService.java @@ -319,8 +319,8 @@ public class DocumentService { return sortBySender(results, dir); } - // RELEVANCE: default when text present and no explicit non-relevance sort requested - boolean useRankOrder = hasText && (sort == null || sort == DocumentSort.RELEVANCE || sort == DocumentSort.DATE); + // RELEVANCE: default when text present and no explicit sort given + boolean useRankOrder = hasText && (sort == null || sort == DocumentSort.RELEVANCE); if (useRankOrder) { List results = documentRepository.findAll(spec); final List ids = rankedIds; diff --git a/backend/src/test/java/org/raddatz/familienarchiv/service/DocumentServiceSortTest.java b/backend/src/test/java/org/raddatz/familienarchiv/service/DocumentServiceSortTest.java new file mode 100644 index 00000000..44a7a51e --- /dev/null +++ b/backend/src/test/java/org/raddatz/familienarchiv/service/DocumentServiceSortTest.java @@ -0,0 +1,100 @@ +package org.raddatz.familienarchiv.service; + +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; +import org.mockito.InjectMocks; +import org.mockito.Mock; +import org.mockito.junit.jupiter.MockitoExtension; +import org.raddatz.familienarchiv.dto.DocumentSort; +import org.raddatz.familienarchiv.model.Document; +import org.raddatz.familienarchiv.model.DocumentStatus; +import org.raddatz.familienarchiv.repository.DocumentRepository; +import org.springframework.data.domain.Sort; +import org.springframework.data.jpa.domain.Specification; + +import java.time.LocalDate; +import java.util.List; +import java.util.UUID; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.Mockito.when; + +@ExtendWith(MockitoExtension.class) +class DocumentServiceSortTest { + + @Mock DocumentRepository documentRepository; + @Mock PersonService personService; + @Mock FileService fileService; + @Mock TagService tagService; + @Mock DocumentVersionService documentVersionService; + @Mock AnnotationService annotationService; + @InjectMocks DocumentService documentService; + + // ─── searchDocuments — DATE sort ────────────────────────────────────────── + + @Test + void searchDocuments_with_DATE_sort_and_text_sorts_chronologically_not_by_relevance() { + UUID id1 = UUID.randomUUID(); // rank position 0 (higher relevance, older doc) + UUID id2 = UUID.randomUUID(); // rank position 1 (lower relevance, newer doc) + + Document older = Document.builder().id(id1) + .title("Brief").status(DocumentStatus.UPLOADED) + .documentDate(LocalDate.of(1940, 1, 1)).build(); + Document newer = Document.builder().id(id2) + .title("Brief").status(DocumentStatus.UPLOADED) + .documentDate(LocalDate.of(1960, 1, 1)).build(); + + // FTS returns id1 first (higher rank), id2 second + when(documentRepository.findRankedIdsByFts("Brief")).thenReturn(List.of(id1, id2)); + // findAll(spec, sort) — the correct date path — returns date-DESC order + when(documentRepository.findAll(any(Specification.class), any(Sort.class))) + .thenReturn(List.of(newer, older)); + + List result = documentService.searchDocuments( + "Brief", null, null, null, null, null, null, null, DocumentSort.DATE, "DESC"); + + // Expect: date order (newer 1960 first), NOT rank order (older 1940 first) + assertThat(result).hasSize(2); + assertThat(result.get(0).getId()).isEqualTo(id2); // newer doc first + } + + // ─── searchDocuments — RELEVANCE sort ───────────────────────────────────── + + @Test + void searchDocuments_with_RELEVANCE_sort_and_text_preserves_fts_rank_order() { + UUID id1 = UUID.randomUUID(); // rank position 0 + UUID id2 = UUID.randomUUID(); // rank position 1 + + Document doc1 = Document.builder().id(id1).title("Brief").status(DocumentStatus.UPLOADED).build(); + Document doc2 = Document.builder().id(id2).title("Brief").status(DocumentStatus.UPLOADED).build(); + + when(documentRepository.findRankedIdsByFts("Brief")).thenReturn(List.of(id1, id2)); + when(documentRepository.findAll(any(Specification.class))) + .thenReturn(List.of(doc2, doc1)); // unordered from DB + + List result = documentService.searchDocuments( + "Brief", null, null, null, null, null, null, null, DocumentSort.RELEVANCE, null); + + // Expect: rank order restored (id1 first) + assertThat(result.get(0).getId()).isEqualTo(id1); + } + + @Test + void searchDocuments_with_null_sort_and_text_defaults_to_fts_rank_order() { + UUID id1 = UUID.randomUUID(); + UUID id2 = UUID.randomUUID(); + + Document doc1 = Document.builder().id(id1).title("Brief").status(DocumentStatus.UPLOADED).build(); + Document doc2 = Document.builder().id(id2).title("Brief").status(DocumentStatus.UPLOADED).build(); + + when(documentRepository.findRankedIdsByFts("Brief")).thenReturn(List.of(id1, id2)); + when(documentRepository.findAll(any(Specification.class))) + .thenReturn(List.of(doc2, doc1)); + + List result = documentService.searchDocuments( + "Brief", null, null, null, null, null, null, null, null, null); + + assertThat(result.get(0).getId()).isEqualTo(id1); + } +} -- 2.49.1 From 43595aeb8afa6883ad649fce9cf9360d35b4a1d7 Mon Sep 17 00:00:00 2001 From: Marcel Date: Wed, 15 Apr 2026 10:59:05 +0200 Subject: [PATCH 5/8] =?UTF-8?q?refactor(search):=20replace=20O(n=C2=B2)=20?= =?UTF-8?q?indexOf=20with=20HashMap=20for=20rank=20ordering?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ids.indexOf() scans the full list for each document, giving O(n²) total. Build a Map once at O(n) and use getOrDefault at O(1) per document. Behavior is identical; existing tests remain green. Co-Authored-By: Claude Sonnet 4.6 --- .../raddatz/familienarchiv/service/DocumentService.java | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/backend/src/main/java/org/raddatz/familienarchiv/service/DocumentService.java b/backend/src/main/java/org/raddatz/familienarchiv/service/DocumentService.java index c0747cc4..46d4a4de 100644 --- a/backend/src/main/java/org/raddatz/familienarchiv/service/DocumentService.java +++ b/backend/src/main/java/org/raddatz/familienarchiv/service/DocumentService.java @@ -323,12 +323,11 @@ public class DocumentService { boolean useRankOrder = hasText && (sort == null || sort == DocumentSort.RELEVANCE); if (useRankOrder) { List results = documentRepository.findAll(spec); - final List ids = rankedIds; + Map rankMap = new HashMap<>(); + for (int i = 0; i < rankedIds.size(); i++) rankMap.put(rankedIds.get(i), i); return results.stream() - .sorted(Comparator.comparingInt(doc -> { - int idx = ids.indexOf(doc.getId()); - return idx < 0 ? Integer.MAX_VALUE : idx; - })) + .sorted(Comparator.comparingInt( + doc -> rankMap.getOrDefault(doc.getId(), Integer.MAX_VALUE))) .toList(); } -- 2.49.1 From 305f95a5729e49a0b6fb156ededcdc45cb904021 Mon Sep 17 00:00:00 2001 From: Marcel Date: Wed, 15 Apr 2026 11:03:37 +0200 Subject: [PATCH 6/8] test(search): add sender name FTS coverage and combined filter test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - should_find_document_by_sender_name — symmetric with existing receiver test - fts_combined_with_status_filter_excludes_non_matching_status — verifies hasIds(rankedIds).and(hasStatus(...)) two-phase search works together Co-Authored-By: Claude Sonnet 4.6 --- .../repository/DocumentFtsTest.java | 45 +++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/backend/src/test/java/org/raddatz/familienarchiv/repository/DocumentFtsTest.java b/backend/src/test/java/org/raddatz/familienarchiv/repository/DocumentFtsTest.java index 3c634e1e..581cb063 100644 --- a/backend/src/test/java/org/raddatz/familienarchiv/repository/DocumentFtsTest.java +++ b/backend/src/test/java/org/raddatz/familienarchiv/repository/DocumentFtsTest.java @@ -16,12 +16,16 @@ import org.springframework.boot.data.jpa.test.autoconfigure.DataJpaTest; import org.springframework.boot.jdbc.test.autoconfigure.AutoConfigureTestDatabase; import org.springframework.context.annotation.Import; +import org.springframework.data.jpa.domain.Specification; + import java.util.List; import java.util.Set; import java.util.UUID; import static org.assertj.core.api.Assertions.assertThat; import static org.assertj.core.api.Assertions.assertThatNoException; +import static org.raddatz.familienarchiv.repository.DocumentSpecifications.hasIds; +import static org.raddatz.familienarchiv.repository.DocumentSpecifications.hasStatus; @DataJpaTest @AutoConfigureTestDatabase(replace = AutoConfigureTestDatabase.Replace.NONE) @@ -193,6 +197,24 @@ class DocumentFtsTest { assertThat(ids).contains(doc.getId()); } + @Test + void should_find_document_by_sender_name() { + Person sender = personRepository.saveAndFlush( + Person.builder().firstName("Walter").lastName("Raddatz").build()); + Document doc = documentRepository.saveAndFlush(Document.builder() + .title("Brief") + .originalFilename("brief.pdf") + .status(DocumentStatus.UPLOADED) + .sender(sender) + .build()); + em.flush(); + em.clear(); + + List ids = documentRepository.findRankedIdsByFts("Raddatz"); + + assertThat(ids).contains(doc.getId()); + } + // ─── Weight D: tag names ─────────────────────────────────────────────────── @Test @@ -212,6 +234,29 @@ class DocumentFtsTest { assertThat(ids).hasSize(1); } + // ─── Combined FTS + Specification filter ────────────────────────────────── + + @Test + void fts_combined_with_status_filter_excludes_non_matching_status() { + documentRepository.saveAndFlush(document("Grundbuch")); // UPLOADED + documentRepository.saveAndFlush(Document.builder() + .title("Grundbuch") + .originalFilename("grundbuch_ph.pdf") + .status(DocumentStatus.PLACEHOLDER) + .build()); + em.flush(); + em.clear(); + + List rankedIds = documentRepository.findRankedIdsByFts("Grundbuch"); + Specification spec = Specification.where(hasIds(rankedIds)) + .and(hasStatus(DocumentStatus.UPLOADED)); + + List result = documentRepository.findAll(spec); + + assertThat(result).hasSize(1); + assertThat(result.get(0).getStatus()).isEqualTo(DocumentStatus.UPLOADED); + } + // ─── Helpers ─────────────────────────────────────────────────────────────── private Document document(String title) { -- 2.49.1 From 793e632889bec483098551dfd7b768ec5fc4f640 Mon Sep 17 00:00:00 2001 From: Marcel Date: Wed, 15 Apr 2026 12:16:16 +0200 Subject: [PATCH 7/8] fix(lint): exclude project.inlang/ from Prettier Inlang regenerates .meta.json and README.md on every compilation run. The regenerated files fail Prettier in CI because the tool writes its own formatting, not ours. Co-Authored-By: Claude Sonnet 4.6 --- frontend/.prettierignore | 1 + 1 file changed, 1 insertion(+) diff --git a/frontend/.prettierignore b/frontend/.prettierignore index 4a03d881..458412f8 100644 --- a/frontend/.prettierignore +++ b/frontend/.prettierignore @@ -18,6 +18,7 @@ bun.lockb /src/lib/paraglide/ /src/lib/paraglide_bak*/ /src/paraglide/ +/project.inlang/ # Test artifacts /test-results/ -- 2.49.1 From 4b8e0637ce85a4041d095ca3db146980cbaf51b8 Mon Sep 17 00:00:00 2001 From: Marcel Date: Wed, 15 Apr 2026 12:28:57 +0200 Subject: [PATCH 8/8] fix(ci): pin DOCKER_API_VERSION=1.43 for Testcontainers on NAS runner Testcontainers 2.0.2 (via Spring Boot 4.0) negotiates Docker API 1.44, but the NAS runner has Docker Engine 24.x which caps at 1.43. Forcing the client version down unblocks tests until Docker is upgraded on the NAS. Co-Authored-By: Claude Sonnet 4.6 --- .gitea/workflows/ci.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.gitea/workflows/ci.yml b/.gitea/workflows/ci.yml index cc298b26..135a6149 100644 --- a/.gitea/workflows/ci.yml +++ b/.gitea/workflows/ci.yml @@ -52,6 +52,8 @@ jobs: backend-unit-tests: name: Backend Unit Tests runs-on: ubuntu-latest + env: + DOCKER_API_VERSION: "1.43" # NAS runner runs Docker 24.x (max API 1.43); Testcontainers 2.x defaults to 1.44 steps: - uses: actions/checkout@v4 -- 2.49.1