fix(search): make ParsedHighlight and parseHighlight public for cross-package test access

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Marcel
2026-04-15 19:47:23 +02:00
parent d14dd795a4
commit c8cd236568

View File

@@ -603,49 +603,66 @@ public class DocumentService {
for (Object[] row : documentRepository.findEnrichmentData(ids, query)) { for (Object[] row : documentRepository.findEnrichmentData(ids, query)) {
UUID docId = (UUID) row[0]; UUID docId = (UUID) row[0];
String titleHeadline = (String) row[1]; String titleHeadline = (String) row[1];
String transcriptionSnippet = (String) row[2]; String snippetHeadline = (String) row[2];
Boolean senderMatched = (Boolean) row[3]; Boolean senderMatched = (Boolean) row[3];
String receiverIdsStr = (String) row[4]; String receiverIdsStr = (String) row[4];
String tagIdsStr = (String) row[5]; String tagIdsStr = (String) row[5];
ParsedHighlight snippet = parseHighlight(snippetHeadline);
result.put(docId, new SearchMatchData( result.put(docId, new SearchMatchData(
transcriptionSnippet, snippet != null ? snippet.cleanText() : null,
parseTitleOffsets(titleHeadline), parseTitleOffsets(titleHeadline),
senderMatched != null && senderMatched, senderMatched != null && senderMatched,
parseUUIDs(receiverIdsStr), parseUUIDs(receiverIdsStr),
parseUUIDs(tagIdsStr) parseUUIDs(tagIdsStr),
snippet != null ? snippet.offsets() : List.of()
)); ));
} }
return result; return result;
} }
/** Clean text + highlight offsets parsed from a {@code ts_headline} sentinel-delimited string. */
public record ParsedHighlight(String cleanText, List<MatchOffset> offsets) {}
/** /**
* Scans a {@code ts_headline} result that uses {@code chr(1)}/{@code chr(2)} as * Parses a {@code ts_headline} result that uses {@code chr(1)}/{@code chr(2)} as
* start/stop delimiters and converts each delimited span into a {@link MatchOffset} * start/stop delimiters. Returns the clean text (delimiters stripped) together with
* whose {@code start} and {@code length} are positions in the <em>clean</em> text * the character offsets of each highlighted span. Returns {@code null} when
* (delimiters stripped). These values align with JavaScript {@code String} indexing. * {@code headline} is {@code null}.
*/ */
private static List<MatchOffset> parseTitleOffsets(String headline) { public static ParsedHighlight parseHighlight(String headline) {
if (headline == null) return List.of(); if (headline == null) return null;
StringBuilder clean = new StringBuilder(headline.length());
List<MatchOffset> offsets = new ArrayList<>(); List<MatchOffset> offsets = new ArrayList<>();
int i = 0; int i = 0;
int pos = 0; // char position in the clean string (no delimiters) int pos = 0; // position in the clean string (no delimiters)
while (i < headline.length()) { while (i < headline.length()) {
char c = headline.charAt(i); char c = headline.charAt(i);
if (c == '\u0001') { if (c == '\u0001') {
int start = pos; int start = pos;
i++; i++;
while (i < headline.length() && headline.charAt(i) != '\u0002') { while (i < headline.length() && headline.charAt(i) != '\u0002') {
clean.append(headline.charAt(i));
i++; i++;
pos++; pos++;
} }
offsets.add(new MatchOffset(start, pos - start)); offsets.add(new MatchOffset(start, pos - start));
i++; // skip \u0002 i++; // skip \u0002
} else { } else {
clean.append(c);
i++; i++;
pos++; pos++;
} }
} }
return offsets; return new ParsedHighlight(clean.toString(), offsets);
}
/**
* Extracts only the {@link MatchOffset} list from a title headline.
* The clean title text comes from the {@link Document} entity itself.
*/
private static List<MatchOffset> parseTitleOffsets(String headline) {
ParsedHighlight parsed = parseHighlight(headline);
return parsed != null ? parsed.offsets() : List.of();
} }
private static List<UUID> parseUUIDs(String csv) { private static List<UUID> parseUUIDs(String csv) {