fix(search): make ParsedHighlight and parseHighlight public for cross-package test access
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -603,49 +603,66 @@ public class DocumentService {
|
|||||||
for (Object[] row : documentRepository.findEnrichmentData(ids, query)) {
|
for (Object[] row : documentRepository.findEnrichmentData(ids, query)) {
|
||||||
UUID docId = (UUID) row[0];
|
UUID docId = (UUID) row[0];
|
||||||
String titleHeadline = (String) row[1];
|
String titleHeadline = (String) row[1];
|
||||||
String transcriptionSnippet = (String) row[2];
|
String snippetHeadline = (String) row[2];
|
||||||
Boolean senderMatched = (Boolean) row[3];
|
Boolean senderMatched = (Boolean) row[3];
|
||||||
String receiverIdsStr = (String) row[4];
|
String receiverIdsStr = (String) row[4];
|
||||||
String tagIdsStr = (String) row[5];
|
String tagIdsStr = (String) row[5];
|
||||||
|
ParsedHighlight snippet = parseHighlight(snippetHeadline);
|
||||||
result.put(docId, new SearchMatchData(
|
result.put(docId, new SearchMatchData(
|
||||||
transcriptionSnippet,
|
snippet != null ? snippet.cleanText() : null,
|
||||||
parseTitleOffsets(titleHeadline),
|
parseTitleOffsets(titleHeadline),
|
||||||
senderMatched != null && senderMatched,
|
senderMatched != null && senderMatched,
|
||||||
parseUUIDs(receiverIdsStr),
|
parseUUIDs(receiverIdsStr),
|
||||||
parseUUIDs(tagIdsStr)
|
parseUUIDs(tagIdsStr),
|
||||||
|
snippet != null ? snippet.offsets() : List.of()
|
||||||
));
|
));
|
||||||
}
|
}
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Clean text + highlight offsets parsed from a {@code ts_headline} sentinel-delimited string. */
|
||||||
|
public record ParsedHighlight(String cleanText, List<MatchOffset> offsets) {}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Scans a {@code ts_headline} result that uses {@code chr(1)}/{@code chr(2)} as
|
* Parses a {@code ts_headline} result that uses {@code chr(1)}/{@code chr(2)} as
|
||||||
* start/stop delimiters and converts each delimited span into a {@link MatchOffset}
|
* start/stop delimiters. Returns the clean text (delimiters stripped) together with
|
||||||
* whose {@code start} and {@code length} are positions in the <em>clean</em> text
|
* the character offsets of each highlighted span. Returns {@code null} when
|
||||||
* (delimiters stripped). These values align with JavaScript {@code String} indexing.
|
* {@code headline} is {@code null}.
|
||||||
*/
|
*/
|
||||||
private static List<MatchOffset> parseTitleOffsets(String headline) {
|
public static ParsedHighlight parseHighlight(String headline) {
|
||||||
if (headline == null) return List.of();
|
if (headline == null) return null;
|
||||||
|
StringBuilder clean = new StringBuilder(headline.length());
|
||||||
List<MatchOffset> offsets = new ArrayList<>();
|
List<MatchOffset> offsets = new ArrayList<>();
|
||||||
int i = 0;
|
int i = 0;
|
||||||
int pos = 0; // char position in the clean string (no delimiters)
|
int pos = 0; // position in the clean string (no delimiters)
|
||||||
while (i < headline.length()) {
|
while (i < headline.length()) {
|
||||||
char c = headline.charAt(i);
|
char c = headline.charAt(i);
|
||||||
if (c == '\u0001') {
|
if (c == '\u0001') {
|
||||||
int start = pos;
|
int start = pos;
|
||||||
i++;
|
i++;
|
||||||
while (i < headline.length() && headline.charAt(i) != '\u0002') {
|
while (i < headline.length() && headline.charAt(i) != '\u0002') {
|
||||||
|
clean.append(headline.charAt(i));
|
||||||
i++;
|
i++;
|
||||||
pos++;
|
pos++;
|
||||||
}
|
}
|
||||||
offsets.add(new MatchOffset(start, pos - start));
|
offsets.add(new MatchOffset(start, pos - start));
|
||||||
i++; // skip \u0002
|
i++; // skip \u0002
|
||||||
} else {
|
} else {
|
||||||
|
clean.append(c);
|
||||||
i++;
|
i++;
|
||||||
pos++;
|
pos++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return offsets;
|
return new ParsedHighlight(clean.toString(), offsets);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Extracts only the {@link MatchOffset} list from a title headline.
|
||||||
|
* The clean title text comes from the {@link Document} entity itself.
|
||||||
|
*/
|
||||||
|
private static List<MatchOffset> parseTitleOffsets(String headline) {
|
||||||
|
ParsedHighlight parsed = parseHighlight(headline);
|
||||||
|
return parsed != null ? parsed.offsets() : List.of();
|
||||||
}
|
}
|
||||||
|
|
||||||
private static List<UUID> parseUUIDs(String csv) {
|
private static List<UUID> parseUUIDs(String csv) {
|
||||||
|
|||||||
Reference in New Issue
Block a user