feat(fts): replace ILIKE hasText with FTS two-phase search and RELEVANCE sort

- DocumentSort: add RELEVANCE enum value
- DocumentSpecifications: remove hasText() ILIKE, add hasIds(List<UUID>)
  for FTS-pre-filtered ID sets
- DocumentService.searchDocuments(): FTS two-phase path — findRankedIdsByFts()
  returns ranked UUIDs, hasIds() narrows subsequent Specification query,
  in-memory re-sort preserves rank order; RELEVANCE is the default when
  text is present and no explicit non-relevance sort is requested
- DocumentSpecificationsTest: remove hasText() tests (Specification removed)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Marcel
2026-04-14 23:46:24 +02:00
parent 24530cf85b
commit 7d456d8e8b
4 changed files with 31 additions and 174 deletions

View File

@@ -1,5 +1,5 @@
package org.raddatz.familienarchiv.dto;
public enum DocumentSort {
DATE, TITLE, SENDER, RECEIVER, UPLOAD_DATE
DATE, TITLE, SENDER, RECEIVER, UPLOAD_DATE, RELEVANCE
}

View File

@@ -8,78 +8,17 @@ import java.util.UUID;
import org.raddatz.familienarchiv.model.Document;
import org.raddatz.familienarchiv.model.DocumentStatus;
import org.raddatz.familienarchiv.model.Person;
import org.raddatz.familienarchiv.model.PersonNameAlias;
import org.raddatz.familienarchiv.model.Tag;
import org.springframework.data.jpa.domain.Specification;
import org.springframework.util.StringUtils;
public class DocumentSpecifications {
// Filtert nach Text (in Titel, Dateiname, Transkription, Ort, Absender- und Empfängername, Tags)
public static Specification<Document> hasText(String text) {
// Filtert nach einer vorberechneten ID-Liste (aus FTS-Abfrage)
public static Specification<Document> hasIds(List<UUID> ids) {
return (root, query, cb) -> {
if (!StringUtils.hasText(text))
return null;
String likePattern = "%" + text.toLowerCase() + "%";
// LEFT JOIN on sender (ManyToOne — no duplicate rows)
Join<Document, Person> senderJoin = root.join("sender", JoinType.LEFT);
// LEFT JOIN sender → aliases (entity-graph navigation avoids a separate DB
// roundtrip while respecting domain boundaries — the alias table is part of
// the Person aggregate, navigated via @OneToMany, not via a cross-domain
// repository call from DocumentService)
Join<Person, PersonNameAlias> senderAliasJoin = senderJoin.join("nameAliases", JoinType.LEFT);
// EXISTS subquery for receiver name — avoids duplicate rows for multi-receiver docs
Subquery<Long> receiverSub = query.subquery(Long.class);
Root<Document> receiverRoot = receiverSub.from(Document.class);
Join<Document, Person> receiverJoin = receiverRoot.join("receivers");
receiverSub.select(cb.literal(1L))
.where(
cb.equal(receiverRoot.get("id"), root.get("id")),
cb.or(
cb.like(cb.lower(receiverJoin.get("lastName")), likePattern),
cb.like(cb.lower(cb.coalesce(receiverJoin.get("firstName"), "")), likePattern)
)
);
// EXISTS subquery for receiver alias name
Subquery<Long> receiverAliasSub = query.subquery(Long.class);
Root<Document> receiverAliasRoot = receiverAliasSub.from(Document.class);
Join<Document, Person> recAliasPersonJoin = receiverAliasRoot.join("receivers");
Join<Person, PersonNameAlias> recAliasJoin = recAliasPersonJoin.join("nameAliases");
receiverAliasSub.select(cb.literal(1L))
.where(
cb.equal(receiverAliasRoot.get("id"), root.get("id")),
cb.like(cb.lower(recAliasJoin.get("lastName")), likePattern)
);
// EXISTS subquery for tag name — avoids duplicate rows for multi-tag docs
Subquery<Long> tagSub = query.subquery(Long.class);
Root<Document> tagRoot = tagSub.from(Document.class);
Join<Document, Tag> tagJoin = tagRoot.join("tags");
tagSub.select(cb.literal(1L))
.where(
cb.equal(tagRoot.get("id"), root.get("id")),
cb.like(cb.lower(tagJoin.get("name")), likePattern)
);
query.distinct(true);
return cb.or(
cb.like(cb.lower(root.get("title")), likePattern),
cb.like(cb.lower(root.get("originalFilename")), likePattern),
cb.like(cb.lower(root.get("transcription")), likePattern),
cb.like(cb.lower(root.get("location")), likePattern),
cb.like(cb.lower(senderJoin.get("lastName")), likePattern),
cb.like(cb.lower(cb.coalesce(senderJoin.get("firstName"), "")), likePattern),
cb.like(cb.lower(senderAliasJoin.get("lastName")), likePattern),
cb.exists(receiverSub),
cb.exists(receiverAliasSub),
cb.exists(tagSub)
);
if (ids == null || ids.isEmpty()) return cb.disjunction();
return root.get("id").in(ids);
};
}

View File

@@ -20,6 +20,7 @@ import org.raddatz.familienarchiv.exception.DomainException;
import org.raddatz.familienarchiv.exception.ErrorCode;
import org.springframework.stereotype.Service;
import org.springframework.transaction.annotation.Transactional;
import org.springframework.util.StringUtils;
import org.springframework.web.multipart.MultipartFile;
import java.io.IOException;
@@ -290,7 +291,16 @@ public class DocumentService {
// 1. Allgemeine Suche (für das Suchfeld im Frontend)
public List<Document> searchDocuments(String text, LocalDate from, LocalDate to, UUID sender, UUID receiver, List<String> tags, String tagQ, DocumentStatus status, DocumentSort sort, String dir) {
Specification<Document> spec = Specification.where(hasText(text))
boolean hasText = StringUtils.hasText(text);
List<UUID> rankedIds = null;
if (hasText) {
rankedIds = documentRepository.findRankedIdsByFts(text);
if (rankedIds.isEmpty()) return List.of();
}
Specification<Document> textSpec = hasText ? hasIds(rankedIds) : (root, query, cb) -> null;
Specification<Document> spec = Specification.where(textSpec)
.and(isBetween(from, to))
.and(hasSender(sender))
.and(hasReceiver(receiver))
@@ -300,7 +310,6 @@ public class DocumentService {
// SENDER and RECEIVER are sorted in-memory because JPA's Sort.by("sender.lastName")
// generates an INNER JOIN that silently drops documents with null sender/receivers.
// TODO: replace with a native @Query using ORDER BY ... NULLS LAST when pagination is added.
if (sort == DocumentSort.RECEIVER) {
List<Document> results = documentRepository.findAll(spec);
return sortByFirstReceiver(results, dir);
@@ -309,13 +318,27 @@ public class DocumentService {
List<Document> results = documentRepository.findAll(spec);
return sortBySender(results, dir);
}
// RELEVANCE: default when text present and no explicit non-relevance sort requested
boolean useRankOrder = hasText && (sort == null || sort == DocumentSort.RELEVANCE || sort == DocumentSort.DATE);
if (useRankOrder) {
List<Document> results = documentRepository.findAll(spec);
final List<UUID> ids = rankedIds;
return results.stream()
.sorted(Comparator.comparingInt(doc -> {
int idx = ids.indexOf(doc.getId());
return idx < 0 ? Integer.MAX_VALUE : idx;
}))
.toList();
}
Sort springSort = resolveSort(sort, dir);
return documentRepository.findAll(spec, springSort);
}
private Sort resolveSort(DocumentSort sort, String dir) {
Sort.Direction direction = "ASC".equalsIgnoreCase(dir) ? Sort.Direction.ASC : Sort.Direction.DESC;
if (sort == null || sort == DocumentSort.DATE) {
if (sort == null || sort == DocumentSort.DATE || sort == DocumentSort.RELEVANCE) {
return Sort.by(direction, "documentDate");
}
// SENDER and RECEIVER are sorted in-memory before this method is called

View File

@@ -7,8 +7,6 @@ import org.raddatz.familienarchiv.config.FlywayConfig;
import org.raddatz.familienarchiv.model.Document;
import org.raddatz.familienarchiv.model.DocumentStatus;
import org.raddatz.familienarchiv.model.Person;
import org.raddatz.familienarchiv.model.PersonNameAlias;
import org.raddatz.familienarchiv.model.PersonNameAliasType;
import org.raddatz.familienarchiv.model.Tag;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.jdbc.test.autoconfigure.AutoConfigureTestDatabase;
@@ -30,7 +28,6 @@ class DocumentSpecificationsTest {
@Autowired DocumentRepository documentRepository;
@Autowired PersonRepository personRepository;
@Autowired PersonNameAliasRepository aliasRepository;
@Autowired TagRepository tagRepository;
private Person sender;
@@ -79,56 +76,6 @@ class DocumentSpecificationsTest {
.build());
}
// ─── hasText ──────────────────────────────────────────────────────────────
@Test
void hasText_returnsAllDocuments_whenTextIsNull() {
List<Document> result = documentRepository.findAll(Specification.where(hasText(null)));
assertThat(result).hasSize(3);
}
@Test
void hasText_returnsAllDocuments_whenTextIsBlank() {
List<Document> result = documentRepository.findAll(Specification.where(hasText(" ")));
assertThat(result).hasSize(3);
}
@Test
void hasText_filtersOnTitle() {
List<Document> result = documentRepository.findAll(Specification.where(hasText("familienfoto")));
assertThat(result).extracting(Document::getTitle).containsExactly("Familienfoto");
}
@Test
void hasText_filtersOnOriginalFilename() {
List<Document> result = documentRepository.findAll(Specification.where(hasText("brief_late")));
assertThat(result).extracting(Document::getTitle).containsExactly("Neuerer Brief");
}
@Test
void hasText_filtersOnTranscription() {
List<Document> result = documentRepository.findAll(Specification.where(hasText("schreibe dir")));
assertThat(result).extracting(Document::getTitle).containsExactly("Alter Brief");
}
@Test
void hasText_filtersOnLocation() {
List<Document> result = documentRepository.findAll(Specification.where(hasText("berlin")));
assertThat(result).extracting(Document::getTitle).containsExactly("Alter Brief");
}
@Test
void hasText_isCaseInsensitive() {
List<Document> result = documentRepository.findAll(Specification.where(hasText("BRIEF")));
assertThat(result).extracting(Document::getTitle).containsExactlyInAnyOrder("Alter Brief", "Neuerer Brief");
}
@Test
void hasText_returnsEmpty_whenNoMatch() {
List<Document> result = documentRepository.findAll(Specification.where(hasText("xyznotexist")));
assertThat(result).isEmpty();
}
// ─── hasSender ────────────────────────────────────────────────────────────
@Test
@@ -253,36 +200,6 @@ class DocumentSpecificationsTest {
assertThat(result).isEmpty();
}
@Test
void hasText_findsByPartialSenderLastName() {
List<Document> result = documentRepository.findAll(Specification.where(hasText("üller")));
assertThat(result).extracting(Document::getTitle)
.containsExactlyInAnyOrder("Alter Brief", "Neuerer Brief");
}
@Test
void hasText_findsByPartialReceiverLastName() {
List<Document> result = documentRepository.findAll(Specification.where(hasText("schmid")));
assertThat(result).extracting(Document::getTitle).containsExactly("Alter Brief");
}
@Test
void hasText_findsByPartialTagName() {
List<Document> result = documentRepository.findAll(Specification.where(hasText("amili")));
assertThat(result).extracting(Document::getTitle)
.containsExactlyInAnyOrder("Alter Brief", "Familienfoto");
}
@Test
void hasText_doesNotProduceDuplicatesForDocumentWithMultipleReceivers() {
Person receiver2 = personRepository.save(Person.builder().firstName("Karl").lastName("Schmidt").build());
briefEarly.setReceivers(new java.util.HashSet<>(Set.of(receiver, receiver2)));
documentRepository.save(briefEarly);
List<Document> result = documentRepository.findAll(Specification.where(hasText("schmid")));
assertThat(result).hasSize(1);
}
// ─── hasTagPartial ────────────────────────────────────────────────────────
@Test
@@ -329,26 +246,4 @@ class DocumentSpecificationsTest {
assertThat(result).isEmpty();
}
// ─── hasText with aliases ────────────────────────────────────────────────
@Test
void hasText_findsDocumentBySenderAliasLastName() {
aliasRepository.save(PersonNameAlias.builder()
.person(sender).lastName("von Mueller").type(PersonNameAliasType.BIRTH).sortOrder(0).build());
List<Document> result = documentRepository.findAll(Specification.where(hasText("von Mueller")));
assertThat(result).isNotEmpty();
assertThat(result).extracting(Document::getTitle).contains("Alter Brief");
}
@Test
void hasText_findsDocumentByReceiverAliasLastName() {
aliasRepository.save(PersonNameAlias.builder()
.person(receiver).lastName("de Gruyter").type(PersonNameAliasType.BIRTH).sortOrder(0).build());
List<Document> result = documentRepository.findAll(Specification.where(hasText("de Gruyter")));
assertThat(result).isNotEmpty();
}
}