feat(fts): replace ILIKE hasText with FTS two-phase search and RELEVANCE sort
- DocumentSort: add RELEVANCE enum value - DocumentSpecifications: remove hasText() ILIKE, add hasIds(List<UUID>) for FTS-pre-filtered ID sets - DocumentService.searchDocuments(): FTS two-phase path — findRankedIdsByFts() returns ranked UUIDs, hasIds() narrows subsequent Specification query, in-memory re-sort preserves rank order; RELEVANCE is the default when text is present and no explicit non-relevance sort is requested - DocumentSpecificationsTest: remove hasText() tests (Specification removed) Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -1,5 +1,5 @@
|
||||
package org.raddatz.familienarchiv.dto;
|
||||
|
||||
public enum DocumentSort {
|
||||
DATE, TITLE, SENDER, RECEIVER, UPLOAD_DATE
|
||||
DATE, TITLE, SENDER, RECEIVER, UPLOAD_DATE, RELEVANCE
|
||||
}
|
||||
|
||||
@@ -8,78 +8,17 @@ import java.util.UUID;
|
||||
|
||||
import org.raddatz.familienarchiv.model.Document;
|
||||
import org.raddatz.familienarchiv.model.DocumentStatus;
|
||||
import org.raddatz.familienarchiv.model.Person;
|
||||
import org.raddatz.familienarchiv.model.PersonNameAlias;
|
||||
import org.raddatz.familienarchiv.model.Tag;
|
||||
import org.springframework.data.jpa.domain.Specification;
|
||||
import org.springframework.util.StringUtils;
|
||||
|
||||
public class DocumentSpecifications {
|
||||
|
||||
// Filtert nach Text (in Titel, Dateiname, Transkription, Ort, Absender- und Empfängername, Tags)
|
||||
public static Specification<Document> hasText(String text) {
|
||||
// Filtert nach einer vorberechneten ID-Liste (aus FTS-Abfrage)
|
||||
public static Specification<Document> hasIds(List<UUID> ids) {
|
||||
return (root, query, cb) -> {
|
||||
if (!StringUtils.hasText(text))
|
||||
return null;
|
||||
String likePattern = "%" + text.toLowerCase() + "%";
|
||||
|
||||
// LEFT JOIN on sender (ManyToOne — no duplicate rows)
|
||||
Join<Document, Person> senderJoin = root.join("sender", JoinType.LEFT);
|
||||
|
||||
// LEFT JOIN sender → aliases (entity-graph navigation avoids a separate DB
|
||||
// roundtrip while respecting domain boundaries — the alias table is part of
|
||||
// the Person aggregate, navigated via @OneToMany, not via a cross-domain
|
||||
// repository call from DocumentService)
|
||||
Join<Person, PersonNameAlias> senderAliasJoin = senderJoin.join("nameAliases", JoinType.LEFT);
|
||||
|
||||
// EXISTS subquery for receiver name — avoids duplicate rows for multi-receiver docs
|
||||
Subquery<Long> receiverSub = query.subquery(Long.class);
|
||||
Root<Document> receiverRoot = receiverSub.from(Document.class);
|
||||
Join<Document, Person> receiverJoin = receiverRoot.join("receivers");
|
||||
receiverSub.select(cb.literal(1L))
|
||||
.where(
|
||||
cb.equal(receiverRoot.get("id"), root.get("id")),
|
||||
cb.or(
|
||||
cb.like(cb.lower(receiverJoin.get("lastName")), likePattern),
|
||||
cb.like(cb.lower(cb.coalesce(receiverJoin.get("firstName"), "")), likePattern)
|
||||
)
|
||||
);
|
||||
|
||||
// EXISTS subquery for receiver alias name
|
||||
Subquery<Long> receiverAliasSub = query.subquery(Long.class);
|
||||
Root<Document> receiverAliasRoot = receiverAliasSub.from(Document.class);
|
||||
Join<Document, Person> recAliasPersonJoin = receiverAliasRoot.join("receivers");
|
||||
Join<Person, PersonNameAlias> recAliasJoin = recAliasPersonJoin.join("nameAliases");
|
||||
receiverAliasSub.select(cb.literal(1L))
|
||||
.where(
|
||||
cb.equal(receiverAliasRoot.get("id"), root.get("id")),
|
||||
cb.like(cb.lower(recAliasJoin.get("lastName")), likePattern)
|
||||
);
|
||||
|
||||
// EXISTS subquery for tag name — avoids duplicate rows for multi-tag docs
|
||||
Subquery<Long> tagSub = query.subquery(Long.class);
|
||||
Root<Document> tagRoot = tagSub.from(Document.class);
|
||||
Join<Document, Tag> tagJoin = tagRoot.join("tags");
|
||||
tagSub.select(cb.literal(1L))
|
||||
.where(
|
||||
cb.equal(tagRoot.get("id"), root.get("id")),
|
||||
cb.like(cb.lower(tagJoin.get("name")), likePattern)
|
||||
);
|
||||
|
||||
query.distinct(true);
|
||||
|
||||
return cb.or(
|
||||
cb.like(cb.lower(root.get("title")), likePattern),
|
||||
cb.like(cb.lower(root.get("originalFilename")), likePattern),
|
||||
cb.like(cb.lower(root.get("transcription")), likePattern),
|
||||
cb.like(cb.lower(root.get("location")), likePattern),
|
||||
cb.like(cb.lower(senderJoin.get("lastName")), likePattern),
|
||||
cb.like(cb.lower(cb.coalesce(senderJoin.get("firstName"), "")), likePattern),
|
||||
cb.like(cb.lower(senderAliasJoin.get("lastName")), likePattern),
|
||||
cb.exists(receiverSub),
|
||||
cb.exists(receiverAliasSub),
|
||||
cb.exists(tagSub)
|
||||
);
|
||||
if (ids == null || ids.isEmpty()) return cb.disjunction();
|
||||
return root.get("id").in(ids);
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
@@ -20,6 +20,7 @@ import org.raddatz.familienarchiv.exception.DomainException;
|
||||
import org.raddatz.familienarchiv.exception.ErrorCode;
|
||||
import org.springframework.stereotype.Service;
|
||||
import org.springframework.transaction.annotation.Transactional;
|
||||
import org.springframework.util.StringUtils;
|
||||
import org.springframework.web.multipart.MultipartFile;
|
||||
|
||||
import java.io.IOException;
|
||||
@@ -290,7 +291,16 @@ public class DocumentService {
|
||||
|
||||
// 1. Allgemeine Suche (für das Suchfeld im Frontend)
|
||||
public List<Document> searchDocuments(String text, LocalDate from, LocalDate to, UUID sender, UUID receiver, List<String> tags, String tagQ, DocumentStatus status, DocumentSort sort, String dir) {
|
||||
Specification<Document> spec = Specification.where(hasText(text))
|
||||
boolean hasText = StringUtils.hasText(text);
|
||||
List<UUID> rankedIds = null;
|
||||
|
||||
if (hasText) {
|
||||
rankedIds = documentRepository.findRankedIdsByFts(text);
|
||||
if (rankedIds.isEmpty()) return List.of();
|
||||
}
|
||||
|
||||
Specification<Document> textSpec = hasText ? hasIds(rankedIds) : (root, query, cb) -> null;
|
||||
Specification<Document> spec = Specification.where(textSpec)
|
||||
.and(isBetween(from, to))
|
||||
.and(hasSender(sender))
|
||||
.and(hasReceiver(receiver))
|
||||
@@ -300,7 +310,6 @@ public class DocumentService {
|
||||
|
||||
// SENDER and RECEIVER are sorted in-memory because JPA's Sort.by("sender.lastName")
|
||||
// generates an INNER JOIN that silently drops documents with null sender/receivers.
|
||||
// TODO: replace with a native @Query using ORDER BY ... NULLS LAST when pagination is added.
|
||||
if (sort == DocumentSort.RECEIVER) {
|
||||
List<Document> results = documentRepository.findAll(spec);
|
||||
return sortByFirstReceiver(results, dir);
|
||||
@@ -309,13 +318,27 @@ public class DocumentService {
|
||||
List<Document> results = documentRepository.findAll(spec);
|
||||
return sortBySender(results, dir);
|
||||
}
|
||||
|
||||
// RELEVANCE: default when text present and no explicit non-relevance sort requested
|
||||
boolean useRankOrder = hasText && (sort == null || sort == DocumentSort.RELEVANCE || sort == DocumentSort.DATE);
|
||||
if (useRankOrder) {
|
||||
List<Document> results = documentRepository.findAll(spec);
|
||||
final List<UUID> ids = rankedIds;
|
||||
return results.stream()
|
||||
.sorted(Comparator.comparingInt(doc -> {
|
||||
int idx = ids.indexOf(doc.getId());
|
||||
return idx < 0 ? Integer.MAX_VALUE : idx;
|
||||
}))
|
||||
.toList();
|
||||
}
|
||||
|
||||
Sort springSort = resolveSort(sort, dir);
|
||||
return documentRepository.findAll(spec, springSort);
|
||||
}
|
||||
|
||||
private Sort resolveSort(DocumentSort sort, String dir) {
|
||||
Sort.Direction direction = "ASC".equalsIgnoreCase(dir) ? Sort.Direction.ASC : Sort.Direction.DESC;
|
||||
if (sort == null || sort == DocumentSort.DATE) {
|
||||
if (sort == null || sort == DocumentSort.DATE || sort == DocumentSort.RELEVANCE) {
|
||||
return Sort.by(direction, "documentDate");
|
||||
}
|
||||
// SENDER and RECEIVER are sorted in-memory before this method is called
|
||||
|
||||
Reference in New Issue
Block a user