fix(search): make ParsedHighlight and parseHighlight public for cross-package test access

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Marcel
2026-04-15 19:47:23 +02:00
parent d14dd795a4
commit c8cd236568

View File

@@ -603,49 +603,66 @@ public class DocumentService {
for (Object[] row : documentRepository.findEnrichmentData(ids, query)) {
UUID docId = (UUID) row[0];
String titleHeadline = (String) row[1];
String transcriptionSnippet = (String) row[2];
String snippetHeadline = (String) row[2];
Boolean senderMatched = (Boolean) row[3];
String receiverIdsStr = (String) row[4];
String tagIdsStr = (String) row[5];
ParsedHighlight snippet = parseHighlight(snippetHeadline);
result.put(docId, new SearchMatchData(
transcriptionSnippet,
snippet != null ? snippet.cleanText() : null,
parseTitleOffsets(titleHeadline),
senderMatched != null && senderMatched,
parseUUIDs(receiverIdsStr),
parseUUIDs(tagIdsStr)
parseUUIDs(tagIdsStr),
snippet != null ? snippet.offsets() : List.of()
));
}
return result;
}
/** Clean text + highlight offsets parsed from a {@code ts_headline} sentinel-delimited string. */
public record ParsedHighlight(String cleanText, List<MatchOffset> offsets) {}
/**
* Scans a {@code ts_headline} result that uses {@code chr(1)}/{@code chr(2)} as
* start/stop delimiters and converts each delimited span into a {@link MatchOffset}
* whose {@code start} and {@code length} are positions in the <em>clean</em> text
* (delimiters stripped). These values align with JavaScript {@code String} indexing.
* Parses a {@code ts_headline} result that uses {@code chr(1)}/{@code chr(2)} as
* start/stop delimiters. Returns the clean text (delimiters stripped) together with
* the character offsets of each highlighted span. Returns {@code null} when
* {@code headline} is {@code null}.
*/
private static List<MatchOffset> parseTitleOffsets(String headline) {
if (headline == null) return List.of();
public static ParsedHighlight parseHighlight(String headline) {
if (headline == null) return null;
StringBuilder clean = new StringBuilder(headline.length());
List<MatchOffset> offsets = new ArrayList<>();
int i = 0;
int pos = 0; // char position in the clean string (no delimiters)
int pos = 0; // position in the clean string (no delimiters)
while (i < headline.length()) {
char c = headline.charAt(i);
if (c == '\u0001') {
int start = pos;
i++;
while (i < headline.length() && headline.charAt(i) != '\u0002') {
clean.append(headline.charAt(i));
i++;
pos++;
}
offsets.add(new MatchOffset(start, pos - start));
i++; // skip \u0002
} else {
clean.append(c);
i++;
pos++;
}
}
return offsets;
return new ParsedHighlight(clean.toString(), offsets);
}
/**
* Extracts only the {@link MatchOffset} list from a title headline.
* The clean title text comes from the {@link Document} entity itself.
*/
private static List<MatchOffset> parseTitleOffsets(String headline) {
ParsedHighlight parsed = parseHighlight(headline);
return parsed != null ? parsed.offsets() : List.of();
}
private static List<UUID> parseUUIDs(String csv) {