diff --git a/backend/src/main/java/org/raddatz/familienarchiv/service/DocumentService.java b/backend/src/main/java/org/raddatz/familienarchiv/service/DocumentService.java index a028a3a5..f3cb6510 100644 --- a/backend/src/main/java/org/raddatz/familienarchiv/service/DocumentService.java +++ b/backend/src/main/java/org/raddatz/familienarchiv/service/DocumentService.java @@ -603,49 +603,66 @@ public class DocumentService { for (Object[] row : documentRepository.findEnrichmentData(ids, query)) { UUID docId = (UUID) row[0]; String titleHeadline = (String) row[1]; - String transcriptionSnippet = (String) row[2]; + String snippetHeadline = (String) row[2]; Boolean senderMatched = (Boolean) row[3]; String receiverIdsStr = (String) row[4]; String tagIdsStr = (String) row[5]; + ParsedHighlight snippet = parseHighlight(snippetHeadline); result.put(docId, new SearchMatchData( - transcriptionSnippet, + snippet != null ? snippet.cleanText() : null, parseTitleOffsets(titleHeadline), senderMatched != null && senderMatched, parseUUIDs(receiverIdsStr), - parseUUIDs(tagIdsStr) + parseUUIDs(tagIdsStr), + snippet != null ? snippet.offsets() : List.of() )); } return result; } + /** Clean text + highlight offsets parsed from a {@code ts_headline} sentinel-delimited string. */ + public record ParsedHighlight(String cleanText, List offsets) {} + /** - * Scans a {@code ts_headline} result that uses {@code chr(1)}/{@code chr(2)} as - * start/stop delimiters and converts each delimited span into a {@link MatchOffset} - * whose {@code start} and {@code length} are positions in the clean text - * (delimiters stripped). These values align with JavaScript {@code String} indexing. + * Parses a {@code ts_headline} result that uses {@code chr(1)}/{@code chr(2)} as + * start/stop delimiters. Returns the clean text (delimiters stripped) together with + * the character offsets of each highlighted span. Returns {@code null} when + * {@code headline} is {@code null}. */ - private static List parseTitleOffsets(String headline) { - if (headline == null) return List.of(); + public static ParsedHighlight parseHighlight(String headline) { + if (headline == null) return null; + StringBuilder clean = new StringBuilder(headline.length()); List offsets = new ArrayList<>(); int i = 0; - int pos = 0; // char position in the clean string (no delimiters) + int pos = 0; // position in the clean string (no delimiters) while (i < headline.length()) { char c = headline.charAt(i); if (c == '\u0001') { int start = pos; i++; while (i < headline.length() && headline.charAt(i) != '\u0002') { + clean.append(headline.charAt(i)); i++; pos++; } offsets.add(new MatchOffset(start, pos - start)); i++; // skip \u0002 } else { + clean.append(c); i++; pos++; } } - return offsets; + return new ParsedHighlight(clean.toString(), offsets); + } + + /** + * Extracts only the {@link MatchOffset} list from a title headline. + * The clean title text comes from the {@link Document} entity itself. + */ + private static List parseTitleOffsets(String headline) { + ParsedHighlight parsed = parseHighlight(headline); + return parsed != null ? parsed.offsets() : List.of(); } private static List parseUUIDs(String csv) {