fix(search): make ParsedHighlight and parseHighlight public for cross-package test access
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -603,49 +603,66 @@ public class DocumentService {
|
||||
for (Object[] row : documentRepository.findEnrichmentData(ids, query)) {
|
||||
UUID docId = (UUID) row[0];
|
||||
String titleHeadline = (String) row[1];
|
||||
String transcriptionSnippet = (String) row[2];
|
||||
String snippetHeadline = (String) row[2];
|
||||
Boolean senderMatched = (Boolean) row[3];
|
||||
String receiverIdsStr = (String) row[4];
|
||||
String tagIdsStr = (String) row[5];
|
||||
ParsedHighlight snippet = parseHighlight(snippetHeadline);
|
||||
result.put(docId, new SearchMatchData(
|
||||
transcriptionSnippet,
|
||||
snippet != null ? snippet.cleanText() : null,
|
||||
parseTitleOffsets(titleHeadline),
|
||||
senderMatched != null && senderMatched,
|
||||
parseUUIDs(receiverIdsStr),
|
||||
parseUUIDs(tagIdsStr)
|
||||
parseUUIDs(tagIdsStr),
|
||||
snippet != null ? snippet.offsets() : List.of()
|
||||
));
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/** Clean text + highlight offsets parsed from a {@code ts_headline} sentinel-delimited string. */
|
||||
public record ParsedHighlight(String cleanText, List<MatchOffset> offsets) {}
|
||||
|
||||
/**
|
||||
* Scans a {@code ts_headline} result that uses {@code chr(1)}/{@code chr(2)} as
|
||||
* start/stop delimiters and converts each delimited span into a {@link MatchOffset}
|
||||
* whose {@code start} and {@code length} are positions in the <em>clean</em> text
|
||||
* (delimiters stripped). These values align with JavaScript {@code String} indexing.
|
||||
* Parses a {@code ts_headline} result that uses {@code chr(1)}/{@code chr(2)} as
|
||||
* start/stop delimiters. Returns the clean text (delimiters stripped) together with
|
||||
* the character offsets of each highlighted span. Returns {@code null} when
|
||||
* {@code headline} is {@code null}.
|
||||
*/
|
||||
private static List<MatchOffset> parseTitleOffsets(String headline) {
|
||||
if (headline == null) return List.of();
|
||||
public static ParsedHighlight parseHighlight(String headline) {
|
||||
if (headline == null) return null;
|
||||
StringBuilder clean = new StringBuilder(headline.length());
|
||||
List<MatchOffset> offsets = new ArrayList<>();
|
||||
int i = 0;
|
||||
int pos = 0; // char position in the clean string (no delimiters)
|
||||
int pos = 0; // position in the clean string (no delimiters)
|
||||
while (i < headline.length()) {
|
||||
char c = headline.charAt(i);
|
||||
if (c == '\u0001') {
|
||||
int start = pos;
|
||||
i++;
|
||||
while (i < headline.length() && headline.charAt(i) != '\u0002') {
|
||||
clean.append(headline.charAt(i));
|
||||
i++;
|
||||
pos++;
|
||||
}
|
||||
offsets.add(new MatchOffset(start, pos - start));
|
||||
i++; // skip \u0002
|
||||
} else {
|
||||
clean.append(c);
|
||||
i++;
|
||||
pos++;
|
||||
}
|
||||
}
|
||||
return offsets;
|
||||
return new ParsedHighlight(clean.toString(), offsets);
|
||||
}
|
||||
|
||||
/**
|
||||
* Extracts only the {@link MatchOffset} list from a title headline.
|
||||
* The clean title text comes from the {@link Document} entity itself.
|
||||
*/
|
||||
private static List<MatchOffset> parseTitleOffsets(String headline) {
|
||||
ParsedHighlight parsed = parseHighlight(headline);
|
||||
return parsed != null ? parsed.offsets() : List.of();
|
||||
}
|
||||
|
||||
private static List<UUID> parseUUIDs(String csv) {
|
||||
|
||||
Reference in New Issue
Block a user