feat(search): enrich searchDocuments with per-document match data

DocumentService.searchDocuments now returns DocumentSearchResult with matchData
populated from findEnrichmentData. Title highlights are parsed from chr(1)/chr(2)
delimiters into MatchOffset lists; transcription snippet and sender/receiver/tag
match flags are extracted from the same native SQL row.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Marcel
2026-04-15 17:53:57 +02:00
parent 8526e6c0a1
commit 8c7ce147a0
5 changed files with 157 additions and 27 deletions

View File

@@ -208,8 +208,7 @@ public class DocumentController {
if (!"ASC".equalsIgnoreCase(dir) && !"DESC".equalsIgnoreCase(dir)) { if (!"ASC".equalsIgnoreCase(dir) && !"DESC".equalsIgnoreCase(dir)) {
throw new ResponseStatusException(HttpStatus.BAD_REQUEST, "dir must be ASC or DESC"); throw new ResponseStatusException(HttpStatus.BAD_REQUEST, "dir must be ASC or DESC");
} }
List<Document> results = documentService.searchDocuments(q, from, to, senderId, receiverId, tags, tagQ, status, sort, dir); return ResponseEntity.ok(documentService.searchDocuments(q, from, to, senderId, receiverId, tags, tagQ, status, sort, dir));
return ResponseEntity.ok(DocumentSearchResult.of(results));
} }
// --- TRAINING LABELS --- // --- TRAINING LABELS ---

View File

@@ -3,10 +3,13 @@ package org.raddatz.familienarchiv.service;
import lombok.RequiredArgsConstructor; import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j; import lombok.extern.slf4j.Slf4j;
import org.raddatz.familienarchiv.dto.DocumentSearchResult;
import org.raddatz.familienarchiv.dto.DocumentSort;
import org.raddatz.familienarchiv.dto.DocumentUpdateDTO; import org.raddatz.familienarchiv.dto.DocumentUpdateDTO;
import org.raddatz.familienarchiv.dto.IncompleteDocumentDTO; import org.raddatz.familienarchiv.dto.IncompleteDocumentDTO;
import org.raddatz.familienarchiv.dto.MatchOffset;
import org.raddatz.familienarchiv.dto.SearchMatchData;
import org.raddatz.familienarchiv.model.Document; import org.raddatz.familienarchiv.model.Document;
import org.raddatz.familienarchiv.dto.DocumentSort;
import org.raddatz.familienarchiv.model.DocumentStatus; import org.raddatz.familienarchiv.model.DocumentStatus;
import org.raddatz.familienarchiv.model.ScriptType; import org.raddatz.familienarchiv.model.ScriptType;
import org.raddatz.familienarchiv.model.TrainingLabel; import org.raddatz.familienarchiv.model.TrainingLabel;
@@ -290,13 +293,13 @@ public class DocumentService {
} }
// 1. Allgemeine Suche (für das Suchfeld im Frontend) // 1. Allgemeine Suche (für das Suchfeld im Frontend)
public List<Document> searchDocuments(String text, LocalDate from, LocalDate to, UUID sender, UUID receiver, List<String> tags, String tagQ, DocumentStatus status, DocumentSort sort, String dir) { public DocumentSearchResult searchDocuments(String text, LocalDate from, LocalDate to, UUID sender, UUID receiver, List<String> tags, String tagQ, DocumentStatus status, DocumentSort sort, String dir) {
boolean hasText = StringUtils.hasText(text); boolean hasText = StringUtils.hasText(text);
List<UUID> rankedIds = null; List<UUID> rankedIds = null;
if (hasText) { if (hasText) {
rankedIds = documentRepository.findRankedIdsByFts(text); rankedIds = documentRepository.findRankedIdsByFts(text);
if (rankedIds.isEmpty()) return List.of(); if (rankedIds.isEmpty()) return DocumentSearchResult.withMatchData(List.of(), Map.of());
} }
Specification<Document> textSpec = hasText ? hasIds(rankedIds) : (root, query, cb) -> null; Specification<Document> textSpec = hasText ? hasIds(rankedIds) : (root, query, cb) -> null;
@@ -312,11 +315,13 @@ public class DocumentService {
// generates an INNER JOIN that silently drops documents with null sender/receivers. // generates an INNER JOIN that silently drops documents with null sender/receivers.
if (sort == DocumentSort.RECEIVER) { if (sort == DocumentSort.RECEIVER) {
List<Document> results = documentRepository.findAll(spec); List<Document> results = documentRepository.findAll(spec);
return sortByFirstReceiver(results, dir); List<Document> sorted = sortByFirstReceiver(results, dir);
return DocumentSearchResult.withMatchData(sorted, enrichWithMatchData(sorted, text));
} }
if (sort == DocumentSort.SENDER) { if (sort == DocumentSort.SENDER) {
List<Document> results = documentRepository.findAll(spec); List<Document> results = documentRepository.findAll(spec);
return sortBySender(results, dir); List<Document> sorted = sortBySender(results, dir);
return DocumentSearchResult.withMatchData(sorted, enrichWithMatchData(sorted, text));
} }
// RELEVANCE: default when text present and no explicit sort given // RELEVANCE: default when text present and no explicit sort given
@@ -325,14 +330,16 @@ public class DocumentService {
List<Document> results = documentRepository.findAll(spec); List<Document> results = documentRepository.findAll(spec);
Map<UUID, Integer> rankMap = new HashMap<>(); Map<UUID, Integer> rankMap = new HashMap<>();
for (int i = 0; i < rankedIds.size(); i++) rankMap.put(rankedIds.get(i), i); for (int i = 0; i < rankedIds.size(); i++) rankMap.put(rankedIds.get(i), i);
return results.stream() List<Document> sorted = results.stream()
.sorted(Comparator.comparingInt( .sorted(Comparator.comparingInt(
doc -> rankMap.getOrDefault(doc.getId(), Integer.MAX_VALUE))) doc -> rankMap.getOrDefault(doc.getId(), Integer.MAX_VALUE)))
.toList(); .toList();
return DocumentSearchResult.withMatchData(sorted, enrichWithMatchData(sorted, text));
} }
Sort springSort = resolveSort(sort, dir); Sort springSort = resolveSort(sort, dir);
return documentRepository.findAll(spec, springSort); List<Document> results = documentRepository.findAll(spec, springSort);
return DocumentSearchResult.withMatchData(results, enrichWithMatchData(results, text));
} }
private Sort resolveSort(DocumentSort sort, String dir) { private Sort resolveSort(DocumentSort sort, String dir) {
@@ -584,6 +591,72 @@ public class DocumentService {
return null; return null;
} }
/**
* Calls {@code findEnrichmentData} and converts the raw Object[] rows into a
* {@link SearchMatchData} per document. Short-circuits when the list is empty or
* the query is blank (no text search active).
*/
private Map<UUID, SearchMatchData> enrichWithMatchData(List<Document> docs, String query) {
if (docs.isEmpty() || !StringUtils.hasText(query)) return Map.of();
List<UUID> ids = docs.stream().map(Document::getId).toList();
Map<UUID, SearchMatchData> result = new HashMap<>();
for (Object[] row : documentRepository.findEnrichmentData(ids, query)) {
UUID docId = (UUID) row[0];
String titleHeadline = (String) row[1];
String transcriptionSnippet = (String) row[2];
Boolean senderMatched = (Boolean) row[3];
String receiverIdsStr = (String) row[4];
String tagIdsStr = (String) row[5];
result.put(docId, new SearchMatchData(
transcriptionSnippet,
parseTitleOffsets(titleHeadline),
senderMatched != null && senderMatched,
parseUUIDs(receiverIdsStr),
parseUUIDs(tagIdsStr)
));
}
return result;
}
/**
* Scans a {@code ts_headline} result that uses {@code chr(1)}/{@code chr(2)} as
* start/stop delimiters and converts each delimited span into a {@link MatchOffset}
* whose {@code start} and {@code length} are positions in the <em>clean</em> text
* (delimiters stripped). These values align with JavaScript {@code String} indexing.
*/
private static List<MatchOffset> parseTitleOffsets(String headline) {
if (headline == null) return List.of();
List<MatchOffset> offsets = new ArrayList<>();
int i = 0;
int pos = 0; // char position in the clean string (no delimiters)
while (i < headline.length()) {
char c = headline.charAt(i);
if (c == '\u0001') {
int start = pos;
i++;
while (i < headline.length() && headline.charAt(i) != '\u0002') {
i++;
pos++;
}
offsets.add(new MatchOffset(start, pos - start));
i++; // skip \u0002
} else {
i++;
pos++;
}
}
return offsets;
}
private static List<UUID> parseUUIDs(String csv) {
if (csv == null || csv.isBlank()) return List.of();
return Arrays.stream(csv.split(","))
.map(String::trim)
.filter(s -> !s.isEmpty())
.map(UUID::fromString)
.toList();
}
private static String sha256Hex(byte[] bytes) { private static String sha256Hex(byte[] bytes) {
try { try {
MessageDigest digest = MessageDigest.getInstance("SHA-256"); MessageDigest digest = MessageDigest.getInstance("SHA-256");

View File

@@ -1,6 +1,7 @@
package org.raddatz.familienarchiv.controller; package org.raddatz.familienarchiv.controller;
import org.junit.jupiter.api.Test; import org.junit.jupiter.api.Test;
import org.raddatz.familienarchiv.dto.DocumentSearchResult;
import org.raddatz.familienarchiv.dto.DocumentVersionSummary; import org.raddatz.familienarchiv.dto.DocumentVersionSummary;
import org.raddatz.familienarchiv.dto.IncompleteDocumentDTO; import org.raddatz.familienarchiv.dto.IncompleteDocumentDTO;
import org.raddatz.familienarchiv.model.Document; import org.raddatz.familienarchiv.model.Document;
@@ -61,7 +62,7 @@ class DocumentControllerTest {
@WithMockUser @WithMockUser
void search_returns200_whenAuthenticated() throws Exception { void search_returns200_whenAuthenticated() throws Exception {
when(documentService.searchDocuments(any(), any(), any(), any(), any(), any(), any(), any(), any(), any())) when(documentService.searchDocuments(any(), any(), any(), any(), any(), any(), any(), any(), any(), any()))
.thenReturn(Collections.emptyList()); .thenReturn(DocumentSearchResult.of(List.of()));
mockMvc.perform(get("/api/documents/search")) mockMvc.perform(get("/api/documents/search"))
.andExpect(status().isOk()); .andExpect(status().isOk());
@@ -71,7 +72,7 @@ class DocumentControllerTest {
@WithMockUser @WithMockUser
void search_withStatusParam_passesItToService() throws Exception { void search_withStatusParam_passesItToService() throws Exception {
when(documentService.searchDocuments(any(), any(), any(), any(), any(), any(), any(), eq(DocumentStatus.REVIEWED), any(), any())) when(documentService.searchDocuments(any(), any(), any(), any(), any(), any(), any(), eq(DocumentStatus.REVIEWED), any(), any()))
.thenReturn(Collections.emptyList()); .thenReturn(DocumentSearchResult.of(List.of()));
mockMvc.perform(get("/api/documents/search").param("status", "REVIEWED")) mockMvc.perform(get("/api/documents/search").param("status", "REVIEWED"))
.andExpect(status().isOk()); .andExpect(status().isOk());
@@ -104,7 +105,7 @@ class DocumentControllerTest {
@WithMockUser @WithMockUser
void search_responseContainsTotalCount() throws Exception { void search_responseContainsTotalCount() throws Exception {
when(documentService.searchDocuments(any(), any(), any(), any(), any(), any(), any(), any(), any(), any())) when(documentService.searchDocuments(any(), any(), any(), any(), any(), any(), any(), any(), any(), any()))
.thenReturn(Collections.emptyList()); .thenReturn(DocumentSearchResult.of(List.of()));
mockMvc.perform(get("/api/documents/search")) mockMvc.perform(get("/api/documents/search"))
.andExpect(status().isOk()) .andExpect(status().isOk())

View File

@@ -5,6 +5,7 @@ import org.junit.jupiter.api.extension.ExtendWith;
import org.mockito.InjectMocks; import org.mockito.InjectMocks;
import org.mockito.Mock; import org.mockito.Mock;
import org.mockito.junit.jupiter.MockitoExtension; import org.mockito.junit.jupiter.MockitoExtension;
import org.raddatz.familienarchiv.dto.DocumentSearchResult;
import org.raddatz.familienarchiv.dto.DocumentSort; import org.raddatz.familienarchiv.dto.DocumentSort;
import org.raddatz.familienarchiv.model.Document; import org.raddatz.familienarchiv.model.Document;
import org.raddatz.familienarchiv.model.DocumentStatus; import org.raddatz.familienarchiv.model.DocumentStatus;
@@ -51,12 +52,12 @@ class DocumentServiceSortTest {
when(documentRepository.findAll(any(Specification.class), any(Sort.class))) when(documentRepository.findAll(any(Specification.class), any(Sort.class)))
.thenReturn(List.of(newer, older)); .thenReturn(List.of(newer, older));
List<Document> result = documentService.searchDocuments( DocumentSearchResult result = documentService.searchDocuments(
"Brief", null, null, null, null, null, null, null, DocumentSort.DATE, "DESC"); "Brief", null, null, null, null, null, null, null, DocumentSort.DATE, "DESC");
// Expect: date order (newer 1960 first), NOT rank order (older 1940 first) // Expect: date order (newer 1960 first), NOT rank order (older 1940 first)
assertThat(result).hasSize(2); assertThat(result.documents()).hasSize(2);
assertThat(result.get(0).getId()).isEqualTo(id2); // newer doc first assertThat(result.documents().get(0).getId()).isEqualTo(id2); // newer doc first
} }
// ─── searchDocuments — RELEVANCE sort ───────────────────────────────────── // ─── searchDocuments — RELEVANCE sort ─────────────────────────────────────
@@ -73,11 +74,11 @@ class DocumentServiceSortTest {
when(documentRepository.findAll(any(Specification.class))) when(documentRepository.findAll(any(Specification.class)))
.thenReturn(List.of(doc2, doc1)); // unordered from DB .thenReturn(List.of(doc2, doc1)); // unordered from DB
List<Document> result = documentService.searchDocuments( DocumentSearchResult result = documentService.searchDocuments(
"Brief", null, null, null, null, null, null, null, DocumentSort.RELEVANCE, null); "Brief", null, null, null, null, null, null, null, DocumentSort.RELEVANCE, null);
// Expect: rank order restored (id1 first) // Expect: rank order restored (id1 first)
assertThat(result.get(0).getId()).isEqualTo(id1); assertThat(result.documents().get(0).getId()).isEqualTo(id1);
} }
@Test @Test
@@ -92,9 +93,9 @@ class DocumentServiceSortTest {
when(documentRepository.findAll(any(Specification.class))) when(documentRepository.findAll(any(Specification.class)))
.thenReturn(List.of(doc2, doc1)); .thenReturn(List.of(doc2, doc1));
List<Document> result = documentService.searchDocuments( DocumentSearchResult result = documentService.searchDocuments(
"Brief", null, null, null, null, null, null, null, null, null); "Brief", null, null, null, null, null, null, null, null, null);
assertThat(result.get(0).getId()).isEqualTo(id1); assertThat(result.documents().get(0).getId()).isEqualTo(id1);
} }
} }

View File

@@ -6,11 +6,14 @@ import org.mockito.ArgumentCaptor;
import org.mockito.InjectMocks; import org.mockito.InjectMocks;
import org.mockito.Mock; import org.mockito.Mock;
import org.mockito.junit.jupiter.MockitoExtension; import org.mockito.junit.jupiter.MockitoExtension;
import org.raddatz.familienarchiv.dto.DocumentSearchResult;
import org.raddatz.familienarchiv.dto.DocumentSort;
import org.raddatz.familienarchiv.dto.DocumentUpdateDTO; import org.raddatz.familienarchiv.dto.DocumentUpdateDTO;
import org.raddatz.familienarchiv.dto.IncompleteDocumentDTO; import org.raddatz.familienarchiv.dto.IncompleteDocumentDTO;
import org.raddatz.familienarchiv.dto.MatchOffset;
import org.raddatz.familienarchiv.dto.SearchMatchData;
import org.raddatz.familienarchiv.exception.DomainException; import org.raddatz.familienarchiv.exception.DomainException;
import org.raddatz.familienarchiv.model.Document; import org.raddatz.familienarchiv.model.Document;
import org.raddatz.familienarchiv.dto.DocumentSort;
import org.raddatz.familienarchiv.model.DocumentStatus; import org.raddatz.familienarchiv.model.DocumentStatus;
import org.raddatz.familienarchiv.model.Person; import org.raddatz.familienarchiv.model.Person;
import org.raddatz.familienarchiv.model.Tag; import org.raddatz.familienarchiv.model.Tag;
@@ -22,6 +25,7 @@ import org.springframework.data.domain.Sort;
import org.springframework.mock.web.MockMultipartFile; import org.springframework.mock.web.MockMultipartFile;
import java.time.LocalDate; import java.time.LocalDate;
import java.util.Collections;
import java.util.HashSet; import java.util.HashSet;
import java.util.List; import java.util.List;
import java.util.Optional; import java.util.Optional;
@@ -1287,11 +1291,11 @@ class DocumentServiceTest {
when(documentRepository.findAll(any(org.springframework.data.jpa.domain.Specification.class))) when(documentRepository.findAll(any(org.springframework.data.jpa.domain.Specification.class)))
.thenReturn(List.of(withSender, noSender)); .thenReturn(List.of(withSender, noSender));
List<Document> result = documentService.searchDocuments( DocumentSearchResult result = documentService.searchDocuments(
null, null, null, null, null, null, null, null, DocumentSort.SENDER, "asc"); null, null, null, null, null, null, null, null, DocumentSort.SENDER, "asc");
assertThat(result).hasSize(2); assertThat(result.documents()).hasSize(2);
assertThat(result).extracting(Document::getTitle).containsExactly("Has Sender", "No Sender"); assertThat(result.documents()).extracting(Document::getTitle).containsExactly("Has Sender", "No Sender");
} }
// ─── searchDocuments — RECEIVER sort, empty receivers ─────────────────────── // ─── searchDocuments — RECEIVER sort, empty receivers ───────────────────────
@@ -1307,10 +1311,10 @@ class DocumentServiceTest {
when(documentRepository.findAll(any(org.springframework.data.jpa.domain.Specification.class))) when(documentRepository.findAll(any(org.springframework.data.jpa.domain.Specification.class)))
.thenReturn(List.of(noReceivers, withReceiver)); .thenReturn(List.of(noReceivers, withReceiver));
List<Document> result = documentService.searchDocuments( DocumentSearchResult result = documentService.searchDocuments(
null, null, null, null, null, null, null, null, DocumentSort.RECEIVER, "asc"); null, null, null, null, null, null, null, null, DocumentSort.RECEIVER, "asc");
assertThat(result).extracting(Document::getTitle) assertThat(result.documents()).extracting(Document::getTitle)
.containsExactly("Has Receiver", "No Receivers"); .containsExactly("Has Receiver", "No Receivers");
} }
@@ -1329,11 +1333,63 @@ class DocumentServiceTest {
when(documentRepository.findAll(any(org.springframework.data.jpa.domain.Specification.class))) when(documentRepository.findAll(any(org.springframework.data.jpa.domain.Specification.class)))
.thenReturn(List.of(docNullName, docSmith)); .thenReturn(List.of(docNullName, docSmith));
List<Document> result = documentService.searchDocuments( DocumentSearchResult result = documentService.searchDocuments(
null, null, null, null, null, null, null, null, DocumentSort.SENDER, "asc"); null, null, null, null, null, null, null, null, DocumentSort.SENDER, "asc");
// null lastName should sort to end (treated as empty), not before "smith" (as "null") // null lastName should sort to end (treated as empty), not before "smith" (as "null")
assertThat(result).extracting(Document::getTitle) assertThat(result.documents()).extracting(Document::getTitle)
.containsExactly("smith doc", "Null lastname doc"); .containsExactly("smith doc", "Null lastname doc");
} }
// ─── searchDocuments — match data enrichment ──────────────────────────────
@Test
void searchDocuments_withTextQuery_includesMatchDataWithTitleOffsets() {
UUID docId = UUID.randomUUID();
Document doc = Document.builder().id(docId).title("Brief an Anna").build();
// chr(1)=\u0001 marks start, chr(2)=\u0002 marks end of highlighted term
List<Object[]> rows = Collections.singletonList(new Object[]{docId, "\u0001Brief\u0002 an Anna", null, false, null, null});
when(documentRepository.findRankedIdsByFts("Brief")).thenReturn(List.of(docId));
when(documentRepository.findAll(any(org.springframework.data.jpa.domain.Specification.class)))
.thenReturn(List.of(doc));
when(documentRepository.findEnrichmentData(any(), eq("Brief"))).thenReturn(rows);
DocumentSearchResult result = documentService.searchDocuments(
"Brief", null, null, null, null, null, null, null, DocumentSort.RELEVANCE, null);
assertThat(result.matchData()).containsKey(docId);
SearchMatchData md = result.matchData().get(docId);
assertThat(md.titleOffsets()).hasSize(1);
assertThat(md.titleOffsets().get(0)).isEqualTo(new MatchOffset(0, 5)); // "Brief" = 5 chars at pos 0
}
@Test
void searchDocuments_withoutTextQuery_returnsEmptyMatchData() {
when(documentRepository.findAll(any(org.springframework.data.jpa.domain.Specification.class), any(Sort.class)))
.thenReturn(List.of());
DocumentSearchResult result = documentService.searchDocuments(
null, null, null, null, null, null, null, null, null, null);
assertThat(result.matchData()).isEmpty();
}
@Test
void searchDocuments_withTextQuery_includesTranscriptionSnippetWhenPresent() {
UUID docId = UUID.randomUUID();
Document doc = Document.builder().id(docId).title("Dok").build();
List<Object[]> rows = Collections.singletonList(new Object[]{docId, "Dok", "Hier ist der Brief aus Berlin", false, null, null});
when(documentRepository.findRankedIdsByFts("Brief")).thenReturn(List.of(docId));
when(documentRepository.findAll(any(org.springframework.data.jpa.domain.Specification.class)))
.thenReturn(List.of(doc));
when(documentRepository.findEnrichmentData(any(), eq("Brief"))).thenReturn(rows);
DocumentSearchResult result = documentService.searchDocuments(
"Brief", null, null, null, null, null, null, null, DocumentSort.RELEVANCE, null);
SearchMatchData md = result.matchData().get(docId);
assertThat(md.transcriptionSnippet()).isEqualTo("Hier ist der Brief aus Berlin");
}
} }