feat(search): enrich searchDocuments with per-document match data
DocumentService.searchDocuments now returns DocumentSearchResult with matchData populated from findEnrichmentData. Title highlights are parsed from chr(1)/chr(2) delimiters into MatchOffset lists; transcription snippet and sender/receiver/tag match flags are extracted from the same native SQL row. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -208,8 +208,7 @@ public class DocumentController {
|
||||
if (!"ASC".equalsIgnoreCase(dir) && !"DESC".equalsIgnoreCase(dir)) {
|
||||
throw new ResponseStatusException(HttpStatus.BAD_REQUEST, "dir must be ASC or DESC");
|
||||
}
|
||||
List<Document> results = documentService.searchDocuments(q, from, to, senderId, receiverId, tags, tagQ, status, sort, dir);
|
||||
return ResponseEntity.ok(DocumentSearchResult.of(results));
|
||||
return ResponseEntity.ok(documentService.searchDocuments(q, from, to, senderId, receiverId, tags, tagQ, status, sort, dir));
|
||||
}
|
||||
|
||||
// --- TRAINING LABELS ---
|
||||
|
||||
@@ -3,10 +3,13 @@ package org.raddatz.familienarchiv.service;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
import org.raddatz.familienarchiv.dto.DocumentSearchResult;
|
||||
import org.raddatz.familienarchiv.dto.DocumentSort;
|
||||
import org.raddatz.familienarchiv.dto.DocumentUpdateDTO;
|
||||
import org.raddatz.familienarchiv.dto.IncompleteDocumentDTO;
|
||||
import org.raddatz.familienarchiv.dto.MatchOffset;
|
||||
import org.raddatz.familienarchiv.dto.SearchMatchData;
|
||||
import org.raddatz.familienarchiv.model.Document;
|
||||
import org.raddatz.familienarchiv.dto.DocumentSort;
|
||||
import org.raddatz.familienarchiv.model.DocumentStatus;
|
||||
import org.raddatz.familienarchiv.model.ScriptType;
|
||||
import org.raddatz.familienarchiv.model.TrainingLabel;
|
||||
@@ -290,13 +293,13 @@ public class DocumentService {
|
||||
}
|
||||
|
||||
// 1. Allgemeine Suche (für das Suchfeld im Frontend)
|
||||
public List<Document> searchDocuments(String text, LocalDate from, LocalDate to, UUID sender, UUID receiver, List<String> tags, String tagQ, DocumentStatus status, DocumentSort sort, String dir) {
|
||||
public DocumentSearchResult searchDocuments(String text, LocalDate from, LocalDate to, UUID sender, UUID receiver, List<String> tags, String tagQ, DocumentStatus status, DocumentSort sort, String dir) {
|
||||
boolean hasText = StringUtils.hasText(text);
|
||||
List<UUID> rankedIds = null;
|
||||
|
||||
if (hasText) {
|
||||
rankedIds = documentRepository.findRankedIdsByFts(text);
|
||||
if (rankedIds.isEmpty()) return List.of();
|
||||
if (rankedIds.isEmpty()) return DocumentSearchResult.withMatchData(List.of(), Map.of());
|
||||
}
|
||||
|
||||
Specification<Document> textSpec = hasText ? hasIds(rankedIds) : (root, query, cb) -> null;
|
||||
@@ -312,11 +315,13 @@ public class DocumentService {
|
||||
// generates an INNER JOIN that silently drops documents with null sender/receivers.
|
||||
if (sort == DocumentSort.RECEIVER) {
|
||||
List<Document> results = documentRepository.findAll(spec);
|
||||
return sortByFirstReceiver(results, dir);
|
||||
List<Document> sorted = sortByFirstReceiver(results, dir);
|
||||
return DocumentSearchResult.withMatchData(sorted, enrichWithMatchData(sorted, text));
|
||||
}
|
||||
if (sort == DocumentSort.SENDER) {
|
||||
List<Document> results = documentRepository.findAll(spec);
|
||||
return sortBySender(results, dir);
|
||||
List<Document> sorted = sortBySender(results, dir);
|
||||
return DocumentSearchResult.withMatchData(sorted, enrichWithMatchData(sorted, text));
|
||||
}
|
||||
|
||||
// RELEVANCE: default when text present and no explicit sort given
|
||||
@@ -325,14 +330,16 @@ public class DocumentService {
|
||||
List<Document> results = documentRepository.findAll(spec);
|
||||
Map<UUID, Integer> rankMap = new HashMap<>();
|
||||
for (int i = 0; i < rankedIds.size(); i++) rankMap.put(rankedIds.get(i), i);
|
||||
return results.stream()
|
||||
List<Document> sorted = results.stream()
|
||||
.sorted(Comparator.comparingInt(
|
||||
doc -> rankMap.getOrDefault(doc.getId(), Integer.MAX_VALUE)))
|
||||
.toList();
|
||||
return DocumentSearchResult.withMatchData(sorted, enrichWithMatchData(sorted, text));
|
||||
}
|
||||
|
||||
Sort springSort = resolveSort(sort, dir);
|
||||
return documentRepository.findAll(spec, springSort);
|
||||
List<Document> results = documentRepository.findAll(spec, springSort);
|
||||
return DocumentSearchResult.withMatchData(results, enrichWithMatchData(results, text));
|
||||
}
|
||||
|
||||
private Sort resolveSort(DocumentSort sort, String dir) {
|
||||
@@ -584,6 +591,72 @@ public class DocumentService {
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Calls {@code findEnrichmentData} and converts the raw Object[] rows into a
|
||||
* {@link SearchMatchData} per document. Short-circuits when the list is empty or
|
||||
* the query is blank (no text search active).
|
||||
*/
|
||||
private Map<UUID, SearchMatchData> enrichWithMatchData(List<Document> docs, String query) {
|
||||
if (docs.isEmpty() || !StringUtils.hasText(query)) return Map.of();
|
||||
List<UUID> ids = docs.stream().map(Document::getId).toList();
|
||||
Map<UUID, SearchMatchData> result = new HashMap<>();
|
||||
for (Object[] row : documentRepository.findEnrichmentData(ids, query)) {
|
||||
UUID docId = (UUID) row[0];
|
||||
String titleHeadline = (String) row[1];
|
||||
String transcriptionSnippet = (String) row[2];
|
||||
Boolean senderMatched = (Boolean) row[3];
|
||||
String receiverIdsStr = (String) row[4];
|
||||
String tagIdsStr = (String) row[5];
|
||||
result.put(docId, new SearchMatchData(
|
||||
transcriptionSnippet,
|
||||
parseTitleOffsets(titleHeadline),
|
||||
senderMatched != null && senderMatched,
|
||||
parseUUIDs(receiverIdsStr),
|
||||
parseUUIDs(tagIdsStr)
|
||||
));
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Scans a {@code ts_headline} result that uses {@code chr(1)}/{@code chr(2)} as
|
||||
* start/stop delimiters and converts each delimited span into a {@link MatchOffset}
|
||||
* whose {@code start} and {@code length} are positions in the <em>clean</em> text
|
||||
* (delimiters stripped). These values align with JavaScript {@code String} indexing.
|
||||
*/
|
||||
private static List<MatchOffset> parseTitleOffsets(String headline) {
|
||||
if (headline == null) return List.of();
|
||||
List<MatchOffset> offsets = new ArrayList<>();
|
||||
int i = 0;
|
||||
int pos = 0; // char position in the clean string (no delimiters)
|
||||
while (i < headline.length()) {
|
||||
char c = headline.charAt(i);
|
||||
if (c == '\u0001') {
|
||||
int start = pos;
|
||||
i++;
|
||||
while (i < headline.length() && headline.charAt(i) != '\u0002') {
|
||||
i++;
|
||||
pos++;
|
||||
}
|
||||
offsets.add(new MatchOffset(start, pos - start));
|
||||
i++; // skip \u0002
|
||||
} else {
|
||||
i++;
|
||||
pos++;
|
||||
}
|
||||
}
|
||||
return offsets;
|
||||
}
|
||||
|
||||
private static List<UUID> parseUUIDs(String csv) {
|
||||
if (csv == null || csv.isBlank()) return List.of();
|
||||
return Arrays.stream(csv.split(","))
|
||||
.map(String::trim)
|
||||
.filter(s -> !s.isEmpty())
|
||||
.map(UUID::fromString)
|
||||
.toList();
|
||||
}
|
||||
|
||||
private static String sha256Hex(byte[] bytes) {
|
||||
try {
|
||||
MessageDigest digest = MessageDigest.getInstance("SHA-256");
|
||||
|
||||
Reference in New Issue
Block a user