feat(search): add snippetOffsets to SearchMatchData and use ts_headline for highlighted snippets
- SearchMatchData gains a 6th field snippetOffsets: List<MatchOffset> so the frontend
can render highlighted terms inside the transcription snippet without {#html}.
- DocumentRepository.findEnrichmentData now calls ts_headline() with chr(1)/chr(2)
sentinels instead of returning raw block text; parseHighlight() strips the sentinels
and produces clean text + MatchOffset list in one pass.
- DocumentService exposes ParsedHighlight and parseHighlight() as public so they can be
called from cross-package integration tests.
- All related tests updated to the new 6-argument SearchMatchData constructor and
to call parseHighlight() for asserting the snippet clean text and offsets.
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -39,10 +39,17 @@ public record SearchMatchData(
|
||||
* IDs of tags whose names matched the query.
|
||||
*/
|
||||
@Schema(requiredMode = Schema.RequiredMode.REQUIRED)
|
||||
List<UUID> matchedTagIds
|
||||
List<UUID> matchedTagIds,
|
||||
|
||||
/**
|
||||
* Character offsets of highlighted terms within the transcription snippet.
|
||||
* Empty when no transcription block matched or the snippet has no highlights.
|
||||
*/
|
||||
@Schema(requiredMode = Schema.RequiredMode.REQUIRED)
|
||||
List<MatchOffset> snippetOffsets
|
||||
) {
|
||||
/** Canonical "no match data" value for a single document. */
|
||||
public static SearchMatchData empty() {
|
||||
return new SearchMatchData(null, List.of(), false, List.of(), List.of());
|
||||
return new SearchMatchData(null, List.of(), false, List.of(), List.of(), List.of());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -95,7 +95,7 @@ public interface DocumentRepository extends JpaRepository<Document, UUID>, JpaSp
|
||||
* <ol>
|
||||
* <li>UUID — document id</li>
|
||||
* <li>String — title headline with \x01/\x02 delimiters around matched terms</li>
|
||||
* <li>String — best-ranked matching transcription block text, or null</li>
|
||||
* <li>String — best-ranked transcription snippet with \x01/\x02 delimiters, or null</li>
|
||||
* <li>Boolean — whether the sender's name matched the query</li>
|
||||
* <li>String — comma-separated matched receiver UUIDs, or null</li>
|
||||
* <li>String — comma-separated matched tag UUIDs, or null</li>
|
||||
@@ -108,7 +108,10 @@ public interface DocumentRepository extends JpaRepository<Document, UUID>, JpaSp
|
||||
ts_headline('german', d.title, websearch_to_tsquery('german', :query),
|
||||
'StartSel=' || chr(1) || ',StopSel=' || chr(2) || ',HighlightAll=true')
|
||||
AS title_headline,
|
||||
best_block.text AS transcription_snippet,
|
||||
CASE WHEN best_block.text IS NOT NULL THEN
|
||||
ts_headline('german', best_block.text, websearch_to_tsquery('german', :query),
|
||||
'StartSel=' || chr(1) || ',StopSel=' || chr(2) || ',MaxWords=50,MinWords=20')
|
||||
END AS transcription_snippet,
|
||||
(s.id IS NOT NULL AND
|
||||
to_tsvector('german', COALESCE(s.first_name, '') || ' ' || COALESCE(s.last_name, ''))
|
||||
@@ websearch_to_tsquery('german', :query))
|
||||
|
||||
Reference in New Issue
Block a user