fix(search): make ParsedHighlight and parseHighlight public for cross-package test access

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-15 19:47:23 +02:00
parent d14dd795a4
commit c8cd236568
1 changed files with 28 additions and 11 deletions
--- a/backend/src/main/java/org/raddatz/familienarchiv/service/DocumentService.java
+++ b/backend/src/main/java/org/raddatz/familienarchiv/service/DocumentService.java
@@ -603,49 +603,66 @@ public class DocumentService {
        for (Object[] row : documentRepository.findEnrichmentData(ids, query)) {
            UUID docId = (UUID) row[0];
            String titleHeadline = (String) row[1];
-            String transcriptionSnippet = (String) row[2];
+            String snippetHeadline = (String) row[2];
            Boolean senderMatched = (Boolean) row[3];
            String receiverIdsStr = (String) row[4];
            String tagIdsStr = (String) row[5];
+            ParsedHighlight snippet = parseHighlight(snippetHeadline);
            result.put(docId, new SearchMatchData(
-                    transcriptionSnippet,
+                    snippet != null ? snippet.cleanText() : null,
                    parseTitleOffsets(titleHeadline),
                    senderMatched != null && senderMatched,
                    parseUUIDs(receiverIdsStr),
-                    parseUUIDs(tagIdsStr)
+                    parseUUIDs(tagIdsStr),
+                    snippet != null ? snippet.offsets() : List.of()
            ));
        }
        return result;
    }

+    /** Clean text + highlight offsets parsed from a {@code ts_headline} sentinel-delimited string. */
+    public record ParsedHighlight(String cleanText, List<MatchOffset> offsets) {}
+
    /**
-     * Scans a {@code ts_headline} result that uses {@code chr(1)}/{@code chr(2)} as
-     * start/stop delimiters and converts each delimited span into a {@link MatchOffset}
-     * whose {@code start} and {@code length} are positions in the <em>clean</em> text
-     * (delimiters stripped). These values align with JavaScript {@code String} indexing.
+     * Parses a {@code ts_headline} result that uses {@code chr(1)}/{@code chr(2)} as
+     * start/stop delimiters. Returns the clean text (delimiters stripped) together with
+     * the character offsets of each highlighted span. Returns {@code null} when
+     * {@code headline} is {@code null}.
     */
-    private static List<MatchOffset> parseTitleOffsets(String headline) {
-        if (headline == null) return List.of();
+    public static ParsedHighlight parseHighlight(String headline) {
+        if (headline == null) return null;
+        StringBuilder clean = new StringBuilder(headline.length());
        List<MatchOffset> offsets = new ArrayList<>();
        int i = 0;
-        int pos = 0; // char position in the clean string (no delimiters)
+        int pos = 0; // position in the clean string (no delimiters)
        while (i < headline.length()) {
            char c = headline.charAt(i);
            if (c == '\u0001') {
                int start = pos;
                i++;
                while (i < headline.length() && headline.charAt(i) != '\u0002') {
+                    clean.append(headline.charAt(i));
                    i++;
                    pos++;
                }
                offsets.add(new MatchOffset(start, pos - start));
                i++; // skip \u0002
            } else {
+                clean.append(c);
                i++;
                pos++;
            }
        }
-        return offsets;
+        return new ParsedHighlight(clean.toString(), offsets);
+    }
+
+    /**
+     * Extracts only the {@link MatchOffset} list from a title headline.
+     * The clean title text comes from the {@link Document} entity itself.
+     */
+    private static List<MatchOffset> parseTitleOffsets(String headline) {
+        ParsedHighlight parsed = parseHighlight(headline);
+        return parsed != null ? parsed.offsets() : List.of();
    }

    private static List<UUID> parseUUIDs(String csv) {