From 8c7ce147a00b17619ae6e5ce2d6f2be5ac1a115a Mon Sep 17 00:00:00 2001 From: Marcel Date: Wed, 15 Apr 2026 17:53:57 +0200 Subject: [PATCH] feat(search): enrich searchDocuments with per-document match data DocumentService.searchDocuments now returns DocumentSearchResult with matchData populated from findEnrichmentData. Title highlights are parsed from chr(1)/chr(2) delimiters into MatchOffset lists; transcription snippet and sender/receiver/tag match flags are extracted from the same native SQL row. Co-Authored-By: Claude Sonnet 4.6 --- .../controller/DocumentController.java | 3 +- .../service/DocumentService.java | 87 +++++++++++++++++-- .../controller/DocumentControllerTest.java | 7 +- .../service/DocumentServiceSortTest.java | 15 ++-- .../service/DocumentServiceTest.java | 72 +++++++++++++-- 5 files changed, 157 insertions(+), 27 deletions(-) diff --git a/backend/src/main/java/org/raddatz/familienarchiv/controller/DocumentController.java b/backend/src/main/java/org/raddatz/familienarchiv/controller/DocumentController.java index 0bff2476..91e3c250 100644 --- a/backend/src/main/java/org/raddatz/familienarchiv/controller/DocumentController.java +++ b/backend/src/main/java/org/raddatz/familienarchiv/controller/DocumentController.java @@ -208,8 +208,7 @@ public class DocumentController { if (!"ASC".equalsIgnoreCase(dir) && !"DESC".equalsIgnoreCase(dir)) { throw new ResponseStatusException(HttpStatus.BAD_REQUEST, "dir must be ASC or DESC"); } - List results = documentService.searchDocuments(q, from, to, senderId, receiverId, tags, tagQ, status, sort, dir); - return ResponseEntity.ok(DocumentSearchResult.of(results)); + return ResponseEntity.ok(documentService.searchDocuments(q, from, to, senderId, receiverId, tags, tagQ, status, sort, dir)); } // --- TRAINING LABELS --- diff --git a/backend/src/main/java/org/raddatz/familienarchiv/service/DocumentService.java b/backend/src/main/java/org/raddatz/familienarchiv/service/DocumentService.java index 46d4a4de..a028a3a5 100644 --- a/backend/src/main/java/org/raddatz/familienarchiv/service/DocumentService.java +++ b/backend/src/main/java/org/raddatz/familienarchiv/service/DocumentService.java @@ -3,10 +3,13 @@ package org.raddatz.familienarchiv.service; import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; +import org.raddatz.familienarchiv.dto.DocumentSearchResult; +import org.raddatz.familienarchiv.dto.DocumentSort; import org.raddatz.familienarchiv.dto.DocumentUpdateDTO; import org.raddatz.familienarchiv.dto.IncompleteDocumentDTO; +import org.raddatz.familienarchiv.dto.MatchOffset; +import org.raddatz.familienarchiv.dto.SearchMatchData; import org.raddatz.familienarchiv.model.Document; -import org.raddatz.familienarchiv.dto.DocumentSort; import org.raddatz.familienarchiv.model.DocumentStatus; import org.raddatz.familienarchiv.model.ScriptType; import org.raddatz.familienarchiv.model.TrainingLabel; @@ -290,13 +293,13 @@ public class DocumentService { } // 1. Allgemeine Suche (für das Suchfeld im Frontend) - public List searchDocuments(String text, LocalDate from, LocalDate to, UUID sender, UUID receiver, List tags, String tagQ, DocumentStatus status, DocumentSort sort, String dir) { + public DocumentSearchResult searchDocuments(String text, LocalDate from, LocalDate to, UUID sender, UUID receiver, List tags, String tagQ, DocumentStatus status, DocumentSort sort, String dir) { boolean hasText = StringUtils.hasText(text); List rankedIds = null; if (hasText) { rankedIds = documentRepository.findRankedIdsByFts(text); - if (rankedIds.isEmpty()) return List.of(); + if (rankedIds.isEmpty()) return DocumentSearchResult.withMatchData(List.of(), Map.of()); } Specification textSpec = hasText ? hasIds(rankedIds) : (root, query, cb) -> null; @@ -312,11 +315,13 @@ public class DocumentService { // generates an INNER JOIN that silently drops documents with null sender/receivers. if (sort == DocumentSort.RECEIVER) { List results = documentRepository.findAll(spec); - return sortByFirstReceiver(results, dir); + List sorted = sortByFirstReceiver(results, dir); + return DocumentSearchResult.withMatchData(sorted, enrichWithMatchData(sorted, text)); } if (sort == DocumentSort.SENDER) { List results = documentRepository.findAll(spec); - return sortBySender(results, dir); + List sorted = sortBySender(results, dir); + return DocumentSearchResult.withMatchData(sorted, enrichWithMatchData(sorted, text)); } // RELEVANCE: default when text present and no explicit sort given @@ -325,14 +330,16 @@ public class DocumentService { List results = documentRepository.findAll(spec); Map rankMap = new HashMap<>(); for (int i = 0; i < rankedIds.size(); i++) rankMap.put(rankedIds.get(i), i); - return results.stream() + List sorted = results.stream() .sorted(Comparator.comparingInt( doc -> rankMap.getOrDefault(doc.getId(), Integer.MAX_VALUE))) .toList(); + return DocumentSearchResult.withMatchData(sorted, enrichWithMatchData(sorted, text)); } Sort springSort = resolveSort(sort, dir); - return documentRepository.findAll(spec, springSort); + List results = documentRepository.findAll(spec, springSort); + return DocumentSearchResult.withMatchData(results, enrichWithMatchData(results, text)); } private Sort resolveSort(DocumentSort sort, String dir) { @@ -584,6 +591,72 @@ public class DocumentService { return null; } + /** + * Calls {@code findEnrichmentData} and converts the raw Object[] rows into a + * {@link SearchMatchData} per document. Short-circuits when the list is empty or + * the query is blank (no text search active). + */ + private Map enrichWithMatchData(List docs, String query) { + if (docs.isEmpty() || !StringUtils.hasText(query)) return Map.of(); + List ids = docs.stream().map(Document::getId).toList(); + Map result = new HashMap<>(); + for (Object[] row : documentRepository.findEnrichmentData(ids, query)) { + UUID docId = (UUID) row[0]; + String titleHeadline = (String) row[1]; + String transcriptionSnippet = (String) row[2]; + Boolean senderMatched = (Boolean) row[3]; + String receiverIdsStr = (String) row[4]; + String tagIdsStr = (String) row[5]; + result.put(docId, new SearchMatchData( + transcriptionSnippet, + parseTitleOffsets(titleHeadline), + senderMatched != null && senderMatched, + parseUUIDs(receiverIdsStr), + parseUUIDs(tagIdsStr) + )); + } + return result; + } + + /** + * Scans a {@code ts_headline} result that uses {@code chr(1)}/{@code chr(2)} as + * start/stop delimiters and converts each delimited span into a {@link MatchOffset} + * whose {@code start} and {@code length} are positions in the clean text + * (delimiters stripped). These values align with JavaScript {@code String} indexing. + */ + private static List parseTitleOffsets(String headline) { + if (headline == null) return List.of(); + List offsets = new ArrayList<>(); + int i = 0; + int pos = 0; // char position in the clean string (no delimiters) + while (i < headline.length()) { + char c = headline.charAt(i); + if (c == '\u0001') { + int start = pos; + i++; + while (i < headline.length() && headline.charAt(i) != '\u0002') { + i++; + pos++; + } + offsets.add(new MatchOffset(start, pos - start)); + i++; // skip \u0002 + } else { + i++; + pos++; + } + } + return offsets; + } + + private static List parseUUIDs(String csv) { + if (csv == null || csv.isBlank()) return List.of(); + return Arrays.stream(csv.split(",")) + .map(String::trim) + .filter(s -> !s.isEmpty()) + .map(UUID::fromString) + .toList(); + } + private static String sha256Hex(byte[] bytes) { try { MessageDigest digest = MessageDigest.getInstance("SHA-256"); diff --git a/backend/src/test/java/org/raddatz/familienarchiv/controller/DocumentControllerTest.java b/backend/src/test/java/org/raddatz/familienarchiv/controller/DocumentControllerTest.java index 3b48e59d..dfce90af 100644 --- a/backend/src/test/java/org/raddatz/familienarchiv/controller/DocumentControllerTest.java +++ b/backend/src/test/java/org/raddatz/familienarchiv/controller/DocumentControllerTest.java @@ -1,6 +1,7 @@ package org.raddatz.familienarchiv.controller; import org.junit.jupiter.api.Test; +import org.raddatz.familienarchiv.dto.DocumentSearchResult; import org.raddatz.familienarchiv.dto.DocumentVersionSummary; import org.raddatz.familienarchiv.dto.IncompleteDocumentDTO; import org.raddatz.familienarchiv.model.Document; @@ -61,7 +62,7 @@ class DocumentControllerTest { @WithMockUser void search_returns200_whenAuthenticated() throws Exception { when(documentService.searchDocuments(any(), any(), any(), any(), any(), any(), any(), any(), any(), any())) - .thenReturn(Collections.emptyList()); + .thenReturn(DocumentSearchResult.of(List.of())); mockMvc.perform(get("/api/documents/search")) .andExpect(status().isOk()); @@ -71,7 +72,7 @@ class DocumentControllerTest { @WithMockUser void search_withStatusParam_passesItToService() throws Exception { when(documentService.searchDocuments(any(), any(), any(), any(), any(), any(), any(), eq(DocumentStatus.REVIEWED), any(), any())) - .thenReturn(Collections.emptyList()); + .thenReturn(DocumentSearchResult.of(List.of())); mockMvc.perform(get("/api/documents/search").param("status", "REVIEWED")) .andExpect(status().isOk()); @@ -104,7 +105,7 @@ class DocumentControllerTest { @WithMockUser void search_responseContainsTotalCount() throws Exception { when(documentService.searchDocuments(any(), any(), any(), any(), any(), any(), any(), any(), any(), any())) - .thenReturn(Collections.emptyList()); + .thenReturn(DocumentSearchResult.of(List.of())); mockMvc.perform(get("/api/documents/search")) .andExpect(status().isOk()) diff --git a/backend/src/test/java/org/raddatz/familienarchiv/service/DocumentServiceSortTest.java b/backend/src/test/java/org/raddatz/familienarchiv/service/DocumentServiceSortTest.java index 44a7a51e..f089635c 100644 --- a/backend/src/test/java/org/raddatz/familienarchiv/service/DocumentServiceSortTest.java +++ b/backend/src/test/java/org/raddatz/familienarchiv/service/DocumentServiceSortTest.java @@ -5,6 +5,7 @@ import org.junit.jupiter.api.extension.ExtendWith; import org.mockito.InjectMocks; import org.mockito.Mock; import org.mockito.junit.jupiter.MockitoExtension; +import org.raddatz.familienarchiv.dto.DocumentSearchResult; import org.raddatz.familienarchiv.dto.DocumentSort; import org.raddatz.familienarchiv.model.Document; import org.raddatz.familienarchiv.model.DocumentStatus; @@ -51,12 +52,12 @@ class DocumentServiceSortTest { when(documentRepository.findAll(any(Specification.class), any(Sort.class))) .thenReturn(List.of(newer, older)); - List result = documentService.searchDocuments( + DocumentSearchResult result = documentService.searchDocuments( "Brief", null, null, null, null, null, null, null, DocumentSort.DATE, "DESC"); // Expect: date order (newer 1960 first), NOT rank order (older 1940 first) - assertThat(result).hasSize(2); - assertThat(result.get(0).getId()).isEqualTo(id2); // newer doc first + assertThat(result.documents()).hasSize(2); + assertThat(result.documents().get(0).getId()).isEqualTo(id2); // newer doc first } // ─── searchDocuments — RELEVANCE sort ───────────────────────────────────── @@ -73,11 +74,11 @@ class DocumentServiceSortTest { when(documentRepository.findAll(any(Specification.class))) .thenReturn(List.of(doc2, doc1)); // unordered from DB - List result = documentService.searchDocuments( + DocumentSearchResult result = documentService.searchDocuments( "Brief", null, null, null, null, null, null, null, DocumentSort.RELEVANCE, null); // Expect: rank order restored (id1 first) - assertThat(result.get(0).getId()).isEqualTo(id1); + assertThat(result.documents().get(0).getId()).isEqualTo(id1); } @Test @@ -92,9 +93,9 @@ class DocumentServiceSortTest { when(documentRepository.findAll(any(Specification.class))) .thenReturn(List.of(doc2, doc1)); - List result = documentService.searchDocuments( + DocumentSearchResult result = documentService.searchDocuments( "Brief", null, null, null, null, null, null, null, null, null); - assertThat(result.get(0).getId()).isEqualTo(id1); + assertThat(result.documents().get(0).getId()).isEqualTo(id1); } } diff --git a/backend/src/test/java/org/raddatz/familienarchiv/service/DocumentServiceTest.java b/backend/src/test/java/org/raddatz/familienarchiv/service/DocumentServiceTest.java index 67db2519..9d6d666b 100644 --- a/backend/src/test/java/org/raddatz/familienarchiv/service/DocumentServiceTest.java +++ b/backend/src/test/java/org/raddatz/familienarchiv/service/DocumentServiceTest.java @@ -6,11 +6,14 @@ import org.mockito.ArgumentCaptor; import org.mockito.InjectMocks; import org.mockito.Mock; import org.mockito.junit.jupiter.MockitoExtension; +import org.raddatz.familienarchiv.dto.DocumentSearchResult; +import org.raddatz.familienarchiv.dto.DocumentSort; import org.raddatz.familienarchiv.dto.DocumentUpdateDTO; import org.raddatz.familienarchiv.dto.IncompleteDocumentDTO; +import org.raddatz.familienarchiv.dto.MatchOffset; +import org.raddatz.familienarchiv.dto.SearchMatchData; import org.raddatz.familienarchiv.exception.DomainException; import org.raddatz.familienarchiv.model.Document; -import org.raddatz.familienarchiv.dto.DocumentSort; import org.raddatz.familienarchiv.model.DocumentStatus; import org.raddatz.familienarchiv.model.Person; import org.raddatz.familienarchiv.model.Tag; @@ -22,6 +25,7 @@ import org.springframework.data.domain.Sort; import org.springframework.mock.web.MockMultipartFile; import java.time.LocalDate; +import java.util.Collections; import java.util.HashSet; import java.util.List; import java.util.Optional; @@ -1287,11 +1291,11 @@ class DocumentServiceTest { when(documentRepository.findAll(any(org.springframework.data.jpa.domain.Specification.class))) .thenReturn(List.of(withSender, noSender)); - List result = documentService.searchDocuments( + DocumentSearchResult result = documentService.searchDocuments( null, null, null, null, null, null, null, null, DocumentSort.SENDER, "asc"); - assertThat(result).hasSize(2); - assertThat(result).extracting(Document::getTitle).containsExactly("Has Sender", "No Sender"); + assertThat(result.documents()).hasSize(2); + assertThat(result.documents()).extracting(Document::getTitle).containsExactly("Has Sender", "No Sender"); } // ─── searchDocuments — RECEIVER sort, empty receivers ─────────────────────── @@ -1307,10 +1311,10 @@ class DocumentServiceTest { when(documentRepository.findAll(any(org.springframework.data.jpa.domain.Specification.class))) .thenReturn(List.of(noReceivers, withReceiver)); - List result = documentService.searchDocuments( + DocumentSearchResult result = documentService.searchDocuments( null, null, null, null, null, null, null, null, DocumentSort.RECEIVER, "asc"); - assertThat(result).extracting(Document::getTitle) + assertThat(result.documents()).extracting(Document::getTitle) .containsExactly("Has Receiver", "No Receivers"); } @@ -1329,11 +1333,63 @@ class DocumentServiceTest { when(documentRepository.findAll(any(org.springframework.data.jpa.domain.Specification.class))) .thenReturn(List.of(docNullName, docSmith)); - List result = documentService.searchDocuments( + DocumentSearchResult result = documentService.searchDocuments( null, null, null, null, null, null, null, null, DocumentSort.SENDER, "asc"); // null lastName should sort to end (treated as empty), not before "smith" (as "null") - assertThat(result).extracting(Document::getTitle) + assertThat(result.documents()).extracting(Document::getTitle) .containsExactly("smith doc", "Null lastname doc"); } + + // ─── searchDocuments — match data enrichment ────────────────────────────── + + @Test + void searchDocuments_withTextQuery_includesMatchDataWithTitleOffsets() { + UUID docId = UUID.randomUUID(); + Document doc = Document.builder().id(docId).title("Brief an Anna").build(); + // chr(1)=\u0001 marks start, chr(2)=\u0002 marks end of highlighted term + List rows = Collections.singletonList(new Object[]{docId, "\u0001Brief\u0002 an Anna", null, false, null, null}); + + when(documentRepository.findRankedIdsByFts("Brief")).thenReturn(List.of(docId)); + when(documentRepository.findAll(any(org.springframework.data.jpa.domain.Specification.class))) + .thenReturn(List.of(doc)); + when(documentRepository.findEnrichmentData(any(), eq("Brief"))).thenReturn(rows); + + DocumentSearchResult result = documentService.searchDocuments( + "Brief", null, null, null, null, null, null, null, DocumentSort.RELEVANCE, null); + + assertThat(result.matchData()).containsKey(docId); + SearchMatchData md = result.matchData().get(docId); + assertThat(md.titleOffsets()).hasSize(1); + assertThat(md.titleOffsets().get(0)).isEqualTo(new MatchOffset(0, 5)); // "Brief" = 5 chars at pos 0 + } + + @Test + void searchDocuments_withoutTextQuery_returnsEmptyMatchData() { + when(documentRepository.findAll(any(org.springframework.data.jpa.domain.Specification.class), any(Sort.class))) + .thenReturn(List.of()); + + DocumentSearchResult result = documentService.searchDocuments( + null, null, null, null, null, null, null, null, null, null); + + assertThat(result.matchData()).isEmpty(); + } + + @Test + void searchDocuments_withTextQuery_includesTranscriptionSnippetWhenPresent() { + UUID docId = UUID.randomUUID(); + Document doc = Document.builder().id(docId).title("Dok").build(); + List rows = Collections.singletonList(new Object[]{docId, "Dok", "Hier ist der Brief aus Berlin", false, null, null}); + + when(documentRepository.findRankedIdsByFts("Brief")).thenReturn(List.of(docId)); + when(documentRepository.findAll(any(org.springframework.data.jpa.domain.Specification.class))) + .thenReturn(List.of(doc)); + when(documentRepository.findEnrichmentData(any(), eq("Brief"))).thenReturn(rows); + + DocumentSearchResult result = documentService.searchDocuments( + "Brief", null, null, null, null, null, null, null, DocumentSort.RELEVANCE, null); + + SearchMatchData md = result.matchData().get(docId); + assertThat(md.transcriptionSnippet()).isEqualTo("Hier ist der Brief aus Berlin"); + } }