feat(search): upgrade to PostgreSQL full-text search with German stemming #237
@@ -52,6 +52,8 @@ jobs:
|
|||||||
backend-unit-tests:
|
backend-unit-tests:
|
||||||
name: Backend Unit Tests
|
name: Backend Unit Tests
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
|
env:
|
||||||
|
DOCKER_API_VERSION: "1.43" # NAS runner runs Docker 24.x (max API 1.43); Testcontainers 2.x defaults to 1.44
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v4
|
- uses: actions/checkout@v4
|
||||||
|
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
package org.raddatz.familienarchiv.dto;
|
package org.raddatz.familienarchiv.dto;
|
||||||
|
|
||||||
public enum DocumentSort {
|
public enum DocumentSort {
|
||||||
DATE, TITLE, SENDER, RECEIVER, UPLOAD_DATE
|
DATE, TITLE, SENDER, RECEIVER, UPLOAD_DATE, RELEVANCE
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -81,4 +81,12 @@ public interface DocumentRepository extends JpaRepository<Document, UUID>, JpaSp
|
|||||||
@Param("to") LocalDate to,
|
@Param("to") LocalDate to,
|
||||||
Sort sort);
|
Sort sort);
|
||||||
|
|
||||||
|
@Query(nativeQuery = true, value = """
|
||||||
|
SELECT d.id FROM documents d
|
||||||
|
WHERE d.search_vector @@ websearch_to_tsquery('german', :query)
|
||||||
|
ORDER BY ts_rank(d.search_vector, websearch_to_tsquery('german', :query)) DESC,
|
||||||
|
d.meta_date DESC NULLS LAST
|
||||||
|
""")
|
||||||
|
List<UUID> findRankedIdsByFts(@Param("query") String query);
|
||||||
|
|
||||||
}
|
}
|
||||||
@@ -8,78 +8,17 @@ import java.util.UUID;
|
|||||||
|
|
||||||
import org.raddatz.familienarchiv.model.Document;
|
import org.raddatz.familienarchiv.model.Document;
|
||||||
import org.raddatz.familienarchiv.model.DocumentStatus;
|
import org.raddatz.familienarchiv.model.DocumentStatus;
|
||||||
import org.raddatz.familienarchiv.model.Person;
|
|
||||||
import org.raddatz.familienarchiv.model.PersonNameAlias;
|
|
||||||
import org.raddatz.familienarchiv.model.Tag;
|
import org.raddatz.familienarchiv.model.Tag;
|
||||||
import org.springframework.data.jpa.domain.Specification;
|
import org.springframework.data.jpa.domain.Specification;
|
||||||
import org.springframework.util.StringUtils;
|
import org.springframework.util.StringUtils;
|
||||||
|
|
||||||
public class DocumentSpecifications {
|
public class DocumentSpecifications {
|
||||||
|
|
||||||
// Filtert nach Text (in Titel, Dateiname, Transkription, Ort, Absender- und Empfängername, Tags)
|
// Filtert nach einer vorberechneten ID-Liste (aus FTS-Abfrage)
|
||||||
public static Specification<Document> hasText(String text) {
|
public static Specification<Document> hasIds(List<UUID> ids) {
|
||||||
return (root, query, cb) -> {
|
return (root, query, cb) -> {
|
||||||
if (!StringUtils.hasText(text))
|
if (ids == null || ids.isEmpty()) return cb.disjunction();
|
||||||
return null;
|
return root.get("id").in(ids);
|
||||||
String likePattern = "%" + text.toLowerCase() + "%";
|
|
||||||
|
|
||||||
// LEFT JOIN on sender (ManyToOne — no duplicate rows)
|
|
||||||
Join<Document, Person> senderJoin = root.join("sender", JoinType.LEFT);
|
|
||||||
|
|
||||||
// LEFT JOIN sender → aliases (entity-graph navigation avoids a separate DB
|
|
||||||
// roundtrip while respecting domain boundaries — the alias table is part of
|
|
||||||
// the Person aggregate, navigated via @OneToMany, not via a cross-domain
|
|
||||||
// repository call from DocumentService)
|
|
||||||
Join<Person, PersonNameAlias> senderAliasJoin = senderJoin.join("nameAliases", JoinType.LEFT);
|
|
||||||
|
|
||||||
// EXISTS subquery for receiver name — avoids duplicate rows for multi-receiver docs
|
|
||||||
Subquery<Long> receiverSub = query.subquery(Long.class);
|
|
||||||
Root<Document> receiverRoot = receiverSub.from(Document.class);
|
|
||||||
Join<Document, Person> receiverJoin = receiverRoot.join("receivers");
|
|
||||||
receiverSub.select(cb.literal(1L))
|
|
||||||
.where(
|
|
||||||
cb.equal(receiverRoot.get("id"), root.get("id")),
|
|
||||||
cb.or(
|
|
||||||
cb.like(cb.lower(receiverJoin.get("lastName")), likePattern),
|
|
||||||
cb.like(cb.lower(cb.coalesce(receiverJoin.get("firstName"), "")), likePattern)
|
|
||||||
)
|
|
||||||
);
|
|
||||||
|
|
||||||
// EXISTS subquery for receiver alias name
|
|
||||||
Subquery<Long> receiverAliasSub = query.subquery(Long.class);
|
|
||||||
Root<Document> receiverAliasRoot = receiverAliasSub.from(Document.class);
|
|
||||||
Join<Document, Person> recAliasPersonJoin = receiverAliasRoot.join("receivers");
|
|
||||||
Join<Person, PersonNameAlias> recAliasJoin = recAliasPersonJoin.join("nameAliases");
|
|
||||||
receiverAliasSub.select(cb.literal(1L))
|
|
||||||
.where(
|
|
||||||
cb.equal(receiverAliasRoot.get("id"), root.get("id")),
|
|
||||||
cb.like(cb.lower(recAliasJoin.get("lastName")), likePattern)
|
|
||||||
);
|
|
||||||
|
|
||||||
// EXISTS subquery for tag name — avoids duplicate rows for multi-tag docs
|
|
||||||
Subquery<Long> tagSub = query.subquery(Long.class);
|
|
||||||
Root<Document> tagRoot = tagSub.from(Document.class);
|
|
||||||
Join<Document, Tag> tagJoin = tagRoot.join("tags");
|
|
||||||
tagSub.select(cb.literal(1L))
|
|
||||||
.where(
|
|
||||||
cb.equal(tagRoot.get("id"), root.get("id")),
|
|
||||||
cb.like(cb.lower(tagJoin.get("name")), likePattern)
|
|
||||||
);
|
|
||||||
|
|
||||||
query.distinct(true);
|
|
||||||
|
|
||||||
return cb.or(
|
|
||||||
cb.like(cb.lower(root.get("title")), likePattern),
|
|
||||||
cb.like(cb.lower(root.get("originalFilename")), likePattern),
|
|
||||||
cb.like(cb.lower(root.get("transcription")), likePattern),
|
|
||||||
cb.like(cb.lower(root.get("location")), likePattern),
|
|
||||||
cb.like(cb.lower(senderJoin.get("lastName")), likePattern),
|
|
||||||
cb.like(cb.lower(cb.coalesce(senderJoin.get("firstName"), "")), likePattern),
|
|
||||||
cb.like(cb.lower(senderAliasJoin.get("lastName")), likePattern),
|
|
||||||
cb.exists(receiverSub),
|
|
||||||
cb.exists(receiverAliasSub),
|
|
||||||
cb.exists(tagSub)
|
|
||||||
);
|
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -20,6 +20,7 @@ import org.raddatz.familienarchiv.exception.DomainException;
|
|||||||
import org.raddatz.familienarchiv.exception.ErrorCode;
|
import org.raddatz.familienarchiv.exception.ErrorCode;
|
||||||
import org.springframework.stereotype.Service;
|
import org.springframework.stereotype.Service;
|
||||||
import org.springframework.transaction.annotation.Transactional;
|
import org.springframework.transaction.annotation.Transactional;
|
||||||
|
import org.springframework.util.StringUtils;
|
||||||
import org.springframework.web.multipart.MultipartFile;
|
import org.springframework.web.multipart.MultipartFile;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
@@ -290,7 +291,16 @@ public class DocumentService {
|
|||||||
|
|
||||||
// 1. Allgemeine Suche (für das Suchfeld im Frontend)
|
// 1. Allgemeine Suche (für das Suchfeld im Frontend)
|
||||||
public List<Document> searchDocuments(String text, LocalDate from, LocalDate to, UUID sender, UUID receiver, List<String> tags, String tagQ, DocumentStatus status, DocumentSort sort, String dir) {
|
public List<Document> searchDocuments(String text, LocalDate from, LocalDate to, UUID sender, UUID receiver, List<String> tags, String tagQ, DocumentStatus status, DocumentSort sort, String dir) {
|
||||||
Specification<Document> spec = Specification.where(hasText(text))
|
boolean hasText = StringUtils.hasText(text);
|
||||||
|
List<UUID> rankedIds = null;
|
||||||
|
|
||||||
|
if (hasText) {
|
||||||
|
rankedIds = documentRepository.findRankedIdsByFts(text);
|
||||||
|
if (rankedIds.isEmpty()) return List.of();
|
||||||
|
}
|
||||||
|
|
||||||
|
Specification<Document> textSpec = hasText ? hasIds(rankedIds) : (root, query, cb) -> null;
|
||||||
|
Specification<Document> spec = Specification.where(textSpec)
|
||||||
.and(isBetween(from, to))
|
.and(isBetween(from, to))
|
||||||
.and(hasSender(sender))
|
.and(hasSender(sender))
|
||||||
.and(hasReceiver(receiver))
|
.and(hasReceiver(receiver))
|
||||||
@@ -300,7 +310,6 @@ public class DocumentService {
|
|||||||
|
|
||||||
// SENDER and RECEIVER are sorted in-memory because JPA's Sort.by("sender.lastName")
|
// SENDER and RECEIVER are sorted in-memory because JPA's Sort.by("sender.lastName")
|
||||||
// generates an INNER JOIN that silently drops documents with null sender/receivers.
|
// generates an INNER JOIN that silently drops documents with null sender/receivers.
|
||||||
// TODO: replace with a native @Query using ORDER BY ... NULLS LAST when pagination is added.
|
|
||||||
if (sort == DocumentSort.RECEIVER) {
|
if (sort == DocumentSort.RECEIVER) {
|
||||||
List<Document> results = documentRepository.findAll(spec);
|
List<Document> results = documentRepository.findAll(spec);
|
||||||
return sortByFirstReceiver(results, dir);
|
return sortByFirstReceiver(results, dir);
|
||||||
@@ -309,13 +318,26 @@ public class DocumentService {
|
|||||||
List<Document> results = documentRepository.findAll(spec);
|
List<Document> results = documentRepository.findAll(spec);
|
||||||
return sortBySender(results, dir);
|
return sortBySender(results, dir);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// RELEVANCE: default when text present and no explicit sort given
|
||||||
|
boolean useRankOrder = hasText && (sort == null || sort == DocumentSort.RELEVANCE);
|
||||||
|
if (useRankOrder) {
|
||||||
|
List<Document> results = documentRepository.findAll(spec);
|
||||||
|
Map<UUID, Integer> rankMap = new HashMap<>();
|
||||||
|
for (int i = 0; i < rankedIds.size(); i++) rankMap.put(rankedIds.get(i), i);
|
||||||
|
return results.stream()
|
||||||
|
.sorted(Comparator.comparingInt(
|
||||||
|
doc -> rankMap.getOrDefault(doc.getId(), Integer.MAX_VALUE)))
|
||||||
|
.toList();
|
||||||
|
}
|
||||||
|
|
||||||
Sort springSort = resolveSort(sort, dir);
|
Sort springSort = resolveSort(sort, dir);
|
||||||
return documentRepository.findAll(spec, springSort);
|
return documentRepository.findAll(spec, springSort);
|
||||||
}
|
}
|
||||||
|
|
||||||
private Sort resolveSort(DocumentSort sort, String dir) {
|
private Sort resolveSort(DocumentSort sort, String dir) {
|
||||||
Sort.Direction direction = "ASC".equalsIgnoreCase(dir) ? Sort.Direction.ASC : Sort.Direction.DESC;
|
Sort.Direction direction = "ASC".equalsIgnoreCase(dir) ? Sort.Direction.ASC : Sort.Direction.DESC;
|
||||||
if (sort == null || sort == DocumentSort.DATE) {
|
if (sort == null || sort == DocumentSort.DATE || sort == DocumentSort.RELEVANCE) {
|
||||||
return Sort.by(direction, "documentDate");
|
return Sort.by(direction, "documentDate");
|
||||||
}
|
}
|
||||||
// SENDER and RECEIVER are sorted in-memory before this method is called
|
// SENDER and RECEIVER are sorted in-memory before this method is called
|
||||||
|
|||||||
@@ -0,0 +1,74 @@
|
|||||||
|
-- ─── Full-Text Search: search_vector on documents ──────────────────────────────
|
||||||
|
-- Adds a tsvector column that aggregates: title (A), summary + transcription
|
||||||
|
-- block text (B), sender/receiver names (C), tag names + location (D).
|
||||||
|
-- The column is maintained by DB triggers so the OCR pipeline (which writes
|
||||||
|
-- transcription_blocks directly) stays in sync without JPA @PreUpdate hooks.
|
||||||
|
|
||||||
|
-- 1. Column and GIN index
|
||||||
|
ALTER TABLE documents ADD COLUMN search_vector tsvector;
|
||||||
|
CREATE INDEX idx_documents_search ON documents USING GIN (search_vector);
|
||||||
|
|
||||||
|
-- 2. Trigger function: rebuilds search_vector on documents INSERT or UPDATE.
|
||||||
|
-- Runs BEFORE the write so NEW.search_vector is set inline.
|
||||||
|
CREATE OR REPLACE FUNCTION fn_documents_fts_update() RETURNS trigger AS $$
|
||||||
|
BEGIN
|
||||||
|
NEW.search_vector :=
|
||||||
|
setweight(to_tsvector('german', coalesce(NEW.title, '')), 'A') ||
|
||||||
|
setweight(to_tsvector('german', coalesce(NEW.summary, '')), 'B') ||
|
||||||
|
setweight(to_tsvector('german', coalesce((
|
||||||
|
SELECT string_agg(tb.text, ' ') FILTER (WHERE tb.text IS NOT NULL)
|
||||||
|
FROM transcription_blocks tb
|
||||||
|
WHERE tb.document_id = NEW.id
|
||||||
|
), '')), 'B') ||
|
||||||
|
setweight(to_tsvector('german', coalesce((
|
||||||
|
SELECT coalesce(p.first_name, '') || ' ' || p.last_name
|
||||||
|
FROM persons p
|
||||||
|
WHERE p.id = NEW.sender_id
|
||||||
|
), '')), 'C') ||
|
||||||
|
setweight(to_tsvector('german', coalesce((
|
||||||
|
SELECT string_agg(coalesce(p.first_name, '') || ' ' || p.last_name, ' ')
|
||||||
|
FROM document_receivers dr
|
||||||
|
JOIN persons p ON p.id = dr.person_id
|
||||||
|
WHERE dr.document_id = NEW.id
|
||||||
|
), '')), 'C') ||
|
||||||
|
setweight(to_tsvector('german', coalesce((
|
||||||
|
SELECT string_agg(t.name, ' ')
|
||||||
|
FROM document_tags dt
|
||||||
|
JOIN tag t ON t.id = dt.tag_id
|
||||||
|
WHERE dt.document_id = NEW.id
|
||||||
|
), '')), 'D') ||
|
||||||
|
setweight(to_tsvector('german', coalesce(NEW.meta_location, '')), 'D');
|
||||||
|
RETURN NEW;
|
||||||
|
END;
|
||||||
|
$$ LANGUAGE plpgsql;
|
||||||
|
|
||||||
|
CREATE TRIGGER trg_documents_fts
|
||||||
|
BEFORE INSERT OR UPDATE ON documents
|
||||||
|
FOR EACH ROW EXECUTE FUNCTION fn_documents_fts_update();
|
||||||
|
|
||||||
|
-- 3. Rebuild trigger for join tables and transcription_blocks.
|
||||||
|
-- These tables don't have a search_vector of their own; instead they
|
||||||
|
-- touch the parent document row ("SET title = title") to re-fire the
|
||||||
|
-- BEFORE UPDATE trigger above, which then recomputes the vector with
|
||||||
|
-- the current state of all joined tables.
|
||||||
|
CREATE OR REPLACE FUNCTION fn_rebuild_document_fts() RETURNS trigger AS $$
|
||||||
|
DECLARE
|
||||||
|
v_doc_id UUID;
|
||||||
|
BEGIN
|
||||||
|
v_doc_id := CASE WHEN TG_OP = 'DELETE' THEN OLD.document_id ELSE NEW.document_id END;
|
||||||
|
UPDATE documents SET title = title WHERE id = v_doc_id;
|
||||||
|
RETURN NULL;
|
||||||
|
END;
|
||||||
|
$$ LANGUAGE plpgsql;
|
||||||
|
|
||||||
|
CREATE TRIGGER trg_transcription_blocks_fts
|
||||||
|
AFTER INSERT OR UPDATE OR DELETE ON transcription_blocks
|
||||||
|
FOR EACH ROW EXECUTE FUNCTION fn_rebuild_document_fts();
|
||||||
|
|
||||||
|
CREATE TRIGGER trg_document_receivers_fts
|
||||||
|
AFTER INSERT OR DELETE ON document_receivers
|
||||||
|
FOR EACH ROW EXECUTE FUNCTION fn_rebuild_document_fts();
|
||||||
|
|
||||||
|
CREATE TRIGGER trg_document_tags_fts
|
||||||
|
AFTER INSERT OR DELETE ON document_tags
|
||||||
|
FOR EACH ROW EXECUTE FUNCTION fn_rebuild_document_fts();
|
||||||
@@ -0,0 +1,6 @@
|
|||||||
|
-- Backfill search_vector for all existing documents.
|
||||||
|
-- The BEFORE UPDATE trigger (trg_documents_fts, installed by V34) recomputes
|
||||||
|
-- the full vector from title, summary, transcription blocks, sender/receiver
|
||||||
|
-- names, tags, and location. At backfill time all join tables already contain
|
||||||
|
-- data, so "SET title = title" is sufficient to fire the trigger for every row.
|
||||||
|
UPDATE documents SET title = title;
|
||||||
@@ -0,0 +1,289 @@
|
|||||||
|
package org.raddatz.familienarchiv.repository;
|
||||||
|
|
||||||
|
import jakarta.persistence.EntityManager;
|
||||||
|
import org.junit.jupiter.api.BeforeEach;
|
||||||
|
import org.junit.jupiter.api.Test;
|
||||||
|
import org.raddatz.familienarchiv.PostgresContainerConfig;
|
||||||
|
import org.raddatz.familienarchiv.config.FlywayConfig;
|
||||||
|
import org.raddatz.familienarchiv.model.Document;
|
||||||
|
import org.raddatz.familienarchiv.model.DocumentAnnotation;
|
||||||
|
import org.raddatz.familienarchiv.model.DocumentStatus;
|
||||||
|
import org.raddatz.familienarchiv.model.Person;
|
||||||
|
import org.raddatz.familienarchiv.model.Tag;
|
||||||
|
import org.raddatz.familienarchiv.model.TranscriptionBlock;
|
||||||
|
import org.springframework.beans.factory.annotation.Autowired;
|
||||||
|
import org.springframework.boot.data.jpa.test.autoconfigure.DataJpaTest;
|
||||||
|
import org.springframework.boot.jdbc.test.autoconfigure.AutoConfigureTestDatabase;
|
||||||
|
import org.springframework.context.annotation.Import;
|
||||||
|
|
||||||
|
import org.springframework.data.jpa.domain.Specification;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Set;
|
||||||
|
import java.util.UUID;
|
||||||
|
|
||||||
|
import static org.assertj.core.api.Assertions.assertThat;
|
||||||
|
import static org.assertj.core.api.Assertions.assertThatNoException;
|
||||||
|
import static org.raddatz.familienarchiv.repository.DocumentSpecifications.hasIds;
|
||||||
|
import static org.raddatz.familienarchiv.repository.DocumentSpecifications.hasStatus;
|
||||||
|
|
||||||
|
@DataJpaTest
|
||||||
|
@AutoConfigureTestDatabase(replace = AutoConfigureTestDatabase.Replace.NONE)
|
||||||
|
@Import({PostgresContainerConfig.class, FlywayConfig.class})
|
||||||
|
class DocumentFtsTest {
|
||||||
|
|
||||||
|
@Autowired DocumentRepository documentRepository;
|
||||||
|
@Autowired PersonRepository personRepository;
|
||||||
|
@Autowired TagRepository tagRepository;
|
||||||
|
@Autowired AnnotationRepository annotationRepository;
|
||||||
|
@Autowired TranscriptionBlockRepository blockRepository;
|
||||||
|
@Autowired EntityManager em;
|
||||||
|
|
||||||
|
@BeforeEach
|
||||||
|
void setUp() {
|
||||||
|
blockRepository.deleteAll();
|
||||||
|
documentRepository.deleteAll();
|
||||||
|
personRepository.deleteAll();
|
||||||
|
tagRepository.deleteAll();
|
||||||
|
}
|
||||||
|
|
||||||
|
// ─── Guard ─────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void german_text_search_config_is_available() {
|
||||||
|
Number count = (Number) em
|
||||||
|
.createNativeQuery("SELECT count(*) FROM pg_ts_config WHERE cfgname = 'german'")
|
||||||
|
.getSingleResult();
|
||||||
|
assertThat(count.longValue()).isEqualTo(1L);
|
||||||
|
}
|
||||||
|
|
||||||
|
// ─── Basic FTS ─────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void should_find_document_by_exact_title_word() {
|
||||||
|
documentRepository.saveAndFlush(document("Alter Brief"));
|
||||||
|
em.clear();
|
||||||
|
|
||||||
|
List<UUID> ids = documentRepository.findRankedIdsByFts("Brief");
|
||||||
|
|
||||||
|
assertThat(ids).hasSize(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void should_find_document_by_stemmed_inflected_form() {
|
||||||
|
documentRepository.saveAndFlush(document("Alter Brief"));
|
||||||
|
em.clear();
|
||||||
|
|
||||||
|
List<UUID> ids = documentRepository.findRankedIdsByFts("Briefe");
|
||||||
|
|
||||||
|
assertThat(ids).hasSize(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void should_not_find_document_when_term_absent() {
|
||||||
|
documentRepository.saveAndFlush(document("Familienfoto"));
|
||||||
|
em.clear();
|
||||||
|
|
||||||
|
List<UUID> ids = documentRepository.findRankedIdsByFts("Brief");
|
||||||
|
|
||||||
|
assertThat(ids).isEmpty();
|
||||||
|
}
|
||||||
|
|
||||||
|
// ─── Transcription blocks ───────────────────────────────────────────────────
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void should_find_document_by_transcription_block_text() {
|
||||||
|
Document doc = documentRepository.saveAndFlush(document("Foto ohne Text"));
|
||||||
|
UUID annotationId = annotation(doc.getId());
|
||||||
|
|
||||||
|
blockRepository.saveAndFlush(block(doc.getId(), annotationId, "Liebe Anna ich schreibe dir aus dem Krieg", 0));
|
||||||
|
em.flush();
|
||||||
|
em.clear();
|
||||||
|
|
||||||
|
List<UUID> ids = documentRepository.findRankedIdsByFts("schreiben");
|
||||||
|
|
||||||
|
assertThat(ids).contains(doc.getId());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void should_rebuild_vector_when_transcription_block_inserted_after_document() {
|
||||||
|
Document doc = documentRepository.saveAndFlush(document("Leeres Dokument"));
|
||||||
|
em.clear();
|
||||||
|
|
||||||
|
assertThat(documentRepository.findRankedIdsByFts("Grundbuch")).isEmpty();
|
||||||
|
|
||||||
|
UUID annotationId = annotation(doc.getId());
|
||||||
|
blockRepository.saveAndFlush(block(doc.getId(), annotationId, "Grundbuch Eintrag 1923", 0));
|
||||||
|
em.flush();
|
||||||
|
em.clear();
|
||||||
|
|
||||||
|
assertThat(documentRepository.findRankedIdsByFts("Grundbuch")).contains(doc.getId());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void should_rebuild_vector_when_transcription_block_deleted() {
|
||||||
|
Document doc = documentRepository.saveAndFlush(document("Dokument mit Block"));
|
||||||
|
UUID annotationId = annotation(doc.getId());
|
||||||
|
TranscriptionBlock block = blockRepository.saveAndFlush(
|
||||||
|
block(doc.getId(), annotationId, "Grundbuch Eintrag 1923", 0));
|
||||||
|
em.flush();
|
||||||
|
em.clear();
|
||||||
|
|
||||||
|
assertThat(documentRepository.findRankedIdsByFts("Grundbuch")).contains(doc.getId());
|
||||||
|
|
||||||
|
blockRepository.deleteById(block.getId());
|
||||||
|
em.flush();
|
||||||
|
em.clear();
|
||||||
|
|
||||||
|
assertThat(documentRepository.findRankedIdsByFts("Grundbuch")).doesNotContain(doc.getId());
|
||||||
|
}
|
||||||
|
|
||||||
|
// ─── Ranking ───────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void should_rank_title_match_above_transcription_match() {
|
||||||
|
// docA: "Grundbuch" only in title (weight A)
|
||||||
|
// docB: "Grundbuch" only in transcription block (weight B)
|
||||||
|
Document docA = documentRepository.saveAndFlush(document("Grundbuch 1923"));
|
||||||
|
Document docB = documentRepository.saveAndFlush(document("Anderes Dokument"));
|
||||||
|
UUID annotationId = annotation(docB.getId());
|
||||||
|
blockRepository.saveAndFlush(block(docB.getId(), annotationId, "Grundbuch steht darin", 0));
|
||||||
|
em.flush();
|
||||||
|
em.clear();
|
||||||
|
|
||||||
|
List<UUID> ids = documentRepository.findRankedIdsByFts("Grundbuch");
|
||||||
|
|
||||||
|
assertThat(ids).hasSize(2);
|
||||||
|
assertThat(ids.get(0)).isEqualTo(docA.getId());
|
||||||
|
}
|
||||||
|
|
||||||
|
// ─── Edge cases ────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void should_return_empty_when_query_contains_only_stop_words() {
|
||||||
|
documentRepository.saveAndFlush(document("Ein Brief von der Oma"));
|
||||||
|
em.clear();
|
||||||
|
|
||||||
|
List<UUID> ids = documentRepository.findRankedIdsByFts("der die das und");
|
||||||
|
|
||||||
|
assertThat(ids).isEmpty();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void should_not_throw_when_query_contains_invalid_tsquery_syntax() {
|
||||||
|
documentRepository.saveAndFlush(document("Brief"));
|
||||||
|
em.clear();
|
||||||
|
|
||||||
|
assertThatNoException().isThrownBy(() -> documentRepository.findRankedIdsByFts("((("));
|
||||||
|
}
|
||||||
|
|
||||||
|
// ─── Weight C: sender/receiver names ───────────────────────────────────────
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void should_find_document_by_receiver_name() {
|
||||||
|
Person receiver = personRepository.saveAndFlush(
|
||||||
|
Person.builder().firstName("Anna").lastName("Schmidt").build());
|
||||||
|
Document doc = documentRepository.saveAndFlush(Document.builder()
|
||||||
|
.title("Brief")
|
||||||
|
.originalFilename("brief.pdf")
|
||||||
|
.status(DocumentStatus.UPLOADED)
|
||||||
|
.receivers(Set.of(receiver))
|
||||||
|
.build());
|
||||||
|
em.flush();
|
||||||
|
em.clear();
|
||||||
|
|
||||||
|
List<UUID> ids = documentRepository.findRankedIdsByFts("Schmidt");
|
||||||
|
|
||||||
|
assertThat(ids).contains(doc.getId());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void should_find_document_by_sender_name() {
|
||||||
|
Person sender = personRepository.saveAndFlush(
|
||||||
|
Person.builder().firstName("Walter").lastName("Raddatz").build());
|
||||||
|
Document doc = documentRepository.saveAndFlush(Document.builder()
|
||||||
|
.title("Brief")
|
||||||
|
.originalFilename("brief.pdf")
|
||||||
|
.status(DocumentStatus.UPLOADED)
|
||||||
|
.sender(sender)
|
||||||
|
.build());
|
||||||
|
em.flush();
|
||||||
|
em.clear();
|
||||||
|
|
||||||
|
List<UUID> ids = documentRepository.findRankedIdsByFts("Raddatz");
|
||||||
|
|
||||||
|
assertThat(ids).contains(doc.getId());
|
||||||
|
}
|
||||||
|
|
||||||
|
// ─── Weight D: tag names ───────────────────────────────────────────────────
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void should_find_document_by_tag_name() {
|
||||||
|
Tag tag = tagRepository.saveAndFlush(Tag.builder().name("Familiengeschichte").build());
|
||||||
|
documentRepository.saveAndFlush(Document.builder()
|
||||||
|
.title("Dokument")
|
||||||
|
.originalFilename("dokument.pdf")
|
||||||
|
.status(DocumentStatus.UPLOADED)
|
||||||
|
.tags(Set.of(tag))
|
||||||
|
.build());
|
||||||
|
em.flush();
|
||||||
|
em.clear();
|
||||||
|
|
||||||
|
List<UUID> ids = documentRepository.findRankedIdsByFts("Familiengeschichte");
|
||||||
|
|
||||||
|
assertThat(ids).hasSize(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
// ─── Combined FTS + Specification filter ──────────────────────────────────
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void fts_combined_with_status_filter_excludes_non_matching_status() {
|
||||||
|
documentRepository.saveAndFlush(document("Grundbuch")); // UPLOADED
|
||||||
|
documentRepository.saveAndFlush(Document.builder()
|
||||||
|
.title("Grundbuch")
|
||||||
|
.originalFilename("grundbuch_ph.pdf")
|
||||||
|
.status(DocumentStatus.PLACEHOLDER)
|
||||||
|
.build());
|
||||||
|
em.flush();
|
||||||
|
em.clear();
|
||||||
|
|
||||||
|
List<UUID> rankedIds = documentRepository.findRankedIdsByFts("Grundbuch");
|
||||||
|
Specification<Document> spec = Specification.where(hasIds(rankedIds))
|
||||||
|
.and(hasStatus(DocumentStatus.UPLOADED));
|
||||||
|
|
||||||
|
List<Document> result = documentRepository.findAll(spec);
|
||||||
|
|
||||||
|
assertThat(result).hasSize(1);
|
||||||
|
assertThat(result.get(0).getStatus()).isEqualTo(DocumentStatus.UPLOADED);
|
||||||
|
}
|
||||||
|
|
||||||
|
// ─── Helpers ───────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
private Document document(String title) {
|
||||||
|
return Document.builder()
|
||||||
|
.title(title)
|
||||||
|
.originalFilename(title.replace(" ", "_") + ".pdf")
|
||||||
|
.status(DocumentStatus.UPLOADED)
|
||||||
|
.build();
|
||||||
|
}
|
||||||
|
|
||||||
|
private UUID annotation(UUID documentId) {
|
||||||
|
DocumentAnnotation ann = annotationRepository.save(DocumentAnnotation.builder()
|
||||||
|
.documentId(documentId)
|
||||||
|
.pageNumber(1)
|
||||||
|
.x(0.1).y(0.2).width(0.3).height(0.4)
|
||||||
|
.color("#00C7B1")
|
||||||
|
.build());
|
||||||
|
em.flush();
|
||||||
|
return ann.getId();
|
||||||
|
}
|
||||||
|
|
||||||
|
private TranscriptionBlock block(UUID documentId, UUID annotationId, String text, int order) {
|
||||||
|
return TranscriptionBlock.builder()
|
||||||
|
.documentId(documentId)
|
||||||
|
.annotationId(annotationId)
|
||||||
|
.text(text)
|
||||||
|
.sortOrder(order)
|
||||||
|
.build();
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -7,8 +7,6 @@ import org.raddatz.familienarchiv.config.FlywayConfig;
|
|||||||
import org.raddatz.familienarchiv.model.Document;
|
import org.raddatz.familienarchiv.model.Document;
|
||||||
import org.raddatz.familienarchiv.model.DocumentStatus;
|
import org.raddatz.familienarchiv.model.DocumentStatus;
|
||||||
import org.raddatz.familienarchiv.model.Person;
|
import org.raddatz.familienarchiv.model.Person;
|
||||||
import org.raddatz.familienarchiv.model.PersonNameAlias;
|
|
||||||
import org.raddatz.familienarchiv.model.PersonNameAliasType;
|
|
||||||
import org.raddatz.familienarchiv.model.Tag;
|
import org.raddatz.familienarchiv.model.Tag;
|
||||||
import org.springframework.beans.factory.annotation.Autowired;
|
import org.springframework.beans.factory.annotation.Autowired;
|
||||||
import org.springframework.boot.jdbc.test.autoconfigure.AutoConfigureTestDatabase;
|
import org.springframework.boot.jdbc.test.autoconfigure.AutoConfigureTestDatabase;
|
||||||
@@ -30,7 +28,6 @@ class DocumentSpecificationsTest {
|
|||||||
|
|
||||||
@Autowired DocumentRepository documentRepository;
|
@Autowired DocumentRepository documentRepository;
|
||||||
@Autowired PersonRepository personRepository;
|
@Autowired PersonRepository personRepository;
|
||||||
@Autowired PersonNameAliasRepository aliasRepository;
|
|
||||||
@Autowired TagRepository tagRepository;
|
@Autowired TagRepository tagRepository;
|
||||||
|
|
||||||
private Person sender;
|
private Person sender;
|
||||||
@@ -79,56 +76,6 @@ class DocumentSpecificationsTest {
|
|||||||
.build());
|
.build());
|
||||||
}
|
}
|
||||||
|
|
||||||
// ─── hasText ──────────────────────────────────────────────────────────────
|
|
||||||
|
|
||||||
@Test
|
|
||||||
void hasText_returnsAllDocuments_whenTextIsNull() {
|
|
||||||
List<Document> result = documentRepository.findAll(Specification.where(hasText(null)));
|
|
||||||
assertThat(result).hasSize(3);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
void hasText_returnsAllDocuments_whenTextIsBlank() {
|
|
||||||
List<Document> result = documentRepository.findAll(Specification.where(hasText(" ")));
|
|
||||||
assertThat(result).hasSize(3);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
void hasText_filtersOnTitle() {
|
|
||||||
List<Document> result = documentRepository.findAll(Specification.where(hasText("familienfoto")));
|
|
||||||
assertThat(result).extracting(Document::getTitle).containsExactly("Familienfoto");
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
void hasText_filtersOnOriginalFilename() {
|
|
||||||
List<Document> result = documentRepository.findAll(Specification.where(hasText("brief_late")));
|
|
||||||
assertThat(result).extracting(Document::getTitle).containsExactly("Neuerer Brief");
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
void hasText_filtersOnTranscription() {
|
|
||||||
List<Document> result = documentRepository.findAll(Specification.where(hasText("schreibe dir")));
|
|
||||||
assertThat(result).extracting(Document::getTitle).containsExactly("Alter Brief");
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
void hasText_filtersOnLocation() {
|
|
||||||
List<Document> result = documentRepository.findAll(Specification.where(hasText("berlin")));
|
|
||||||
assertThat(result).extracting(Document::getTitle).containsExactly("Alter Brief");
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
void hasText_isCaseInsensitive() {
|
|
||||||
List<Document> result = documentRepository.findAll(Specification.where(hasText("BRIEF")));
|
|
||||||
assertThat(result).extracting(Document::getTitle).containsExactlyInAnyOrder("Alter Brief", "Neuerer Brief");
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
void hasText_returnsEmpty_whenNoMatch() {
|
|
||||||
List<Document> result = documentRepository.findAll(Specification.where(hasText("xyznotexist")));
|
|
||||||
assertThat(result).isEmpty();
|
|
||||||
}
|
|
||||||
|
|
||||||
// ─── hasSender ────────────────────────────────────────────────────────────
|
// ─── hasSender ────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
@@ -253,36 +200,6 @@ class DocumentSpecificationsTest {
|
|||||||
assertThat(result).isEmpty();
|
assertThat(result).isEmpty();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
|
||||||
void hasText_findsByPartialSenderLastName() {
|
|
||||||
List<Document> result = documentRepository.findAll(Specification.where(hasText("üller")));
|
|
||||||
assertThat(result).extracting(Document::getTitle)
|
|
||||||
.containsExactlyInAnyOrder("Alter Brief", "Neuerer Brief");
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
void hasText_findsByPartialReceiverLastName() {
|
|
||||||
List<Document> result = documentRepository.findAll(Specification.where(hasText("schmid")));
|
|
||||||
assertThat(result).extracting(Document::getTitle).containsExactly("Alter Brief");
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
void hasText_findsByPartialTagName() {
|
|
||||||
List<Document> result = documentRepository.findAll(Specification.where(hasText("amili")));
|
|
||||||
assertThat(result).extracting(Document::getTitle)
|
|
||||||
.containsExactlyInAnyOrder("Alter Brief", "Familienfoto");
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
void hasText_doesNotProduceDuplicatesForDocumentWithMultipleReceivers() {
|
|
||||||
Person receiver2 = personRepository.save(Person.builder().firstName("Karl").lastName("Schmidt").build());
|
|
||||||
briefEarly.setReceivers(new java.util.HashSet<>(Set.of(receiver, receiver2)));
|
|
||||||
documentRepository.save(briefEarly);
|
|
||||||
|
|
||||||
List<Document> result = documentRepository.findAll(Specification.where(hasText("schmid")));
|
|
||||||
assertThat(result).hasSize(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
// ─── hasTagPartial ────────────────────────────────────────────────────────
|
// ─── hasTagPartial ────────────────────────────────────────────────────────
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
@@ -329,26 +246,4 @@ class DocumentSpecificationsTest {
|
|||||||
assertThat(result).isEmpty();
|
assertThat(result).isEmpty();
|
||||||
}
|
}
|
||||||
|
|
||||||
// ─── hasText with aliases ────────────────────────────────────────────────
|
|
||||||
|
|
||||||
@Test
|
|
||||||
void hasText_findsDocumentBySenderAliasLastName() {
|
|
||||||
aliasRepository.save(PersonNameAlias.builder()
|
|
||||||
.person(sender).lastName("von Mueller").type(PersonNameAliasType.BIRTH).sortOrder(0).build());
|
|
||||||
|
|
||||||
List<Document> result = documentRepository.findAll(Specification.where(hasText("von Mueller")));
|
|
||||||
|
|
||||||
assertThat(result).isNotEmpty();
|
|
||||||
assertThat(result).extracting(Document::getTitle).contains("Alter Brief");
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
void hasText_findsDocumentByReceiverAliasLastName() {
|
|
||||||
aliasRepository.save(PersonNameAlias.builder()
|
|
||||||
.person(receiver).lastName("de Gruyter").type(PersonNameAliasType.BIRTH).sortOrder(0).build());
|
|
||||||
|
|
||||||
List<Document> result = documentRepository.findAll(Specification.where(hasText("de Gruyter")));
|
|
||||||
|
|
||||||
assertThat(result).isNotEmpty();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -0,0 +1,100 @@
|
|||||||
|
package org.raddatz.familienarchiv.service;
|
||||||
|
|
||||||
|
import org.junit.jupiter.api.Test;
|
||||||
|
import org.junit.jupiter.api.extension.ExtendWith;
|
||||||
|
import org.mockito.InjectMocks;
|
||||||
|
import org.mockito.Mock;
|
||||||
|
import org.mockito.junit.jupiter.MockitoExtension;
|
||||||
|
import org.raddatz.familienarchiv.dto.DocumentSort;
|
||||||
|
import org.raddatz.familienarchiv.model.Document;
|
||||||
|
import org.raddatz.familienarchiv.model.DocumentStatus;
|
||||||
|
import org.raddatz.familienarchiv.repository.DocumentRepository;
|
||||||
|
import org.springframework.data.domain.Sort;
|
||||||
|
import org.springframework.data.jpa.domain.Specification;
|
||||||
|
|
||||||
|
import java.time.LocalDate;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.UUID;
|
||||||
|
|
||||||
|
import static org.assertj.core.api.Assertions.assertThat;
|
||||||
|
import static org.mockito.ArgumentMatchers.any;
|
||||||
|
import static org.mockito.Mockito.when;
|
||||||
|
|
||||||
|
@ExtendWith(MockitoExtension.class)
|
||||||
|
class DocumentServiceSortTest {
|
||||||
|
|
||||||
|
@Mock DocumentRepository documentRepository;
|
||||||
|
@Mock PersonService personService;
|
||||||
|
@Mock FileService fileService;
|
||||||
|
@Mock TagService tagService;
|
||||||
|
@Mock DocumentVersionService documentVersionService;
|
||||||
|
@Mock AnnotationService annotationService;
|
||||||
|
@InjectMocks DocumentService documentService;
|
||||||
|
|
||||||
|
// ─── searchDocuments — DATE sort ──────────────────────────────────────────
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void searchDocuments_with_DATE_sort_and_text_sorts_chronologically_not_by_relevance() {
|
||||||
|
UUID id1 = UUID.randomUUID(); // rank position 0 (higher relevance, older doc)
|
||||||
|
UUID id2 = UUID.randomUUID(); // rank position 1 (lower relevance, newer doc)
|
||||||
|
|
||||||
|
Document older = Document.builder().id(id1)
|
||||||
|
.title("Brief").status(DocumentStatus.UPLOADED)
|
||||||
|
.documentDate(LocalDate.of(1940, 1, 1)).build();
|
||||||
|
Document newer = Document.builder().id(id2)
|
||||||
|
.title("Brief").status(DocumentStatus.UPLOADED)
|
||||||
|
.documentDate(LocalDate.of(1960, 1, 1)).build();
|
||||||
|
|
||||||
|
// FTS returns id1 first (higher rank), id2 second
|
||||||
|
when(documentRepository.findRankedIdsByFts("Brief")).thenReturn(List.of(id1, id2));
|
||||||
|
// findAll(spec, sort) — the correct date path — returns date-DESC order
|
||||||
|
when(documentRepository.findAll(any(Specification.class), any(Sort.class)))
|
||||||
|
.thenReturn(List.of(newer, older));
|
||||||
|
|
||||||
|
List<Document> result = documentService.searchDocuments(
|
||||||
|
"Brief", null, null, null, null, null, null, null, DocumentSort.DATE, "DESC");
|
||||||
|
|
||||||
|
// Expect: date order (newer 1960 first), NOT rank order (older 1940 first)
|
||||||
|
assertThat(result).hasSize(2);
|
||||||
|
assertThat(result.get(0).getId()).isEqualTo(id2); // newer doc first
|
||||||
|
}
|
||||||
|
|
||||||
|
// ─── searchDocuments — RELEVANCE sort ─────────────────────────────────────
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void searchDocuments_with_RELEVANCE_sort_and_text_preserves_fts_rank_order() {
|
||||||
|
UUID id1 = UUID.randomUUID(); // rank position 0
|
||||||
|
UUID id2 = UUID.randomUUID(); // rank position 1
|
||||||
|
|
||||||
|
Document doc1 = Document.builder().id(id1).title("Brief").status(DocumentStatus.UPLOADED).build();
|
||||||
|
Document doc2 = Document.builder().id(id2).title("Brief").status(DocumentStatus.UPLOADED).build();
|
||||||
|
|
||||||
|
when(documentRepository.findRankedIdsByFts("Brief")).thenReturn(List.of(id1, id2));
|
||||||
|
when(documentRepository.findAll(any(Specification.class)))
|
||||||
|
.thenReturn(List.of(doc2, doc1)); // unordered from DB
|
||||||
|
|
||||||
|
List<Document> result = documentService.searchDocuments(
|
||||||
|
"Brief", null, null, null, null, null, null, null, DocumentSort.RELEVANCE, null);
|
||||||
|
|
||||||
|
// Expect: rank order restored (id1 first)
|
||||||
|
assertThat(result.get(0).getId()).isEqualTo(id1);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void searchDocuments_with_null_sort_and_text_defaults_to_fts_rank_order() {
|
||||||
|
UUID id1 = UUID.randomUUID();
|
||||||
|
UUID id2 = UUID.randomUUID();
|
||||||
|
|
||||||
|
Document doc1 = Document.builder().id(id1).title("Brief").status(DocumentStatus.UPLOADED).build();
|
||||||
|
Document doc2 = Document.builder().id(id2).title("Brief").status(DocumentStatus.UPLOADED).build();
|
||||||
|
|
||||||
|
when(documentRepository.findRankedIdsByFts("Brief")).thenReturn(List.of(id1, id2));
|
||||||
|
when(documentRepository.findAll(any(Specification.class)))
|
||||||
|
.thenReturn(List.of(doc2, doc1));
|
||||||
|
|
||||||
|
List<Document> result = documentService.searchDocuments(
|
||||||
|
"Brief", null, null, null, null, null, null, null, null, null);
|
||||||
|
|
||||||
|
assertThat(result.get(0).getId()).isEqualTo(id1);
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -18,6 +18,7 @@ bun.lockb
|
|||||||
/src/lib/paraglide/
|
/src/lib/paraglide/
|
||||||
/src/lib/paraglide_bak*/
|
/src/lib/paraglide_bak*/
|
||||||
/src/paraglide/
|
/src/paraglide/
|
||||||
|
/project.inlang/
|
||||||
|
|
||||||
# Test artifacts
|
# Test artifacts
|
||||||
/test-results/
|
/test-results/
|
||||||
|
|||||||
Reference in New Issue
Block a user