feat(search): surface summary snippet when summary matched the query
Some checks failed
CI / Unit & Component Tests (push) Failing after 2m31s
CI / Backend Unit Tests (push) Failing after 2m39s
CI / Unit & Component Tests (pull_request) Failing after 2m27s
CI / Backend Unit Tests (pull_request) Failing after 4m45s

Add a summary_snippet column to findEnrichmentData using ts_headline on
documents.summary, only when the summary's tsvector matches the query.
Expose it via SearchMatchData.summarySnippet / summaryOffsets and render
a "Zusammenfassung" / "Summary" / "Resumen" labelled row in the document
list — identical treatment to the transcription snippet row.

Fixes the case where a document appeared in search results with no
visible match explanation (e.g. searching "frucht" found a document
whose summary mentioned "Früchte").

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Marcel
2026-04-15 21:34:10 +02:00
parent b87036dde0
commit bca7822ab7
7 changed files with 51 additions and 20 deletions

View File

@@ -7,8 +7,8 @@ import java.util.UUID;
/** /**
* Match signals for a single document in a full-text search result. * Match signals for a single document in a full-text search result.
* All fields are non-null except {@code transcriptionSnippet}, which is null * All fields are non-null except {@code transcriptionSnippet} and {@code summarySnippet},
* when no transcription block matched the query. * which are null when the respective field did not match the query.
*/ */
public record SearchMatchData( public record SearchMatchData(
/** /**
@@ -46,10 +46,22 @@ public record SearchMatchData(
* Empty when no transcription block matched or the snippet has no highlights. * Empty when no transcription block matched or the snippet has no highlights.
*/ */
@Schema(requiredMode = Schema.RequiredMode.REQUIRED) @Schema(requiredMode = Schema.RequiredMode.REQUIRED)
List<MatchOffset> snippetOffsets List<MatchOffset> snippetOffsets,
/**
* Highlighted summary excerpt, or null if the summary did not match the query.
*/
String summarySnippet,
/**
* Character offsets of highlighted terms within the summary snippet.
* Empty when the summary did not match or has no highlights.
*/
@Schema(requiredMode = Schema.RequiredMode.REQUIRED)
List<MatchOffset> summaryOffsets
) { ) {
/** Canonical "no match data" value for a single document. */ /** Canonical "no match data" value for a single document. */
public static SearchMatchData empty() { public static SearchMatchData empty() {
return new SearchMatchData(null, List.of(), false, List.of(), List.of(), List.of()); return new SearchMatchData(null, List.of(), false, List.of(), List.of(), List.of(), null, List.of());
} }
} }

View File

@@ -607,14 +607,18 @@ public class DocumentService {
Boolean senderMatched = (Boolean) row[3]; Boolean senderMatched = (Boolean) row[3];
String receiverIdsStr = (String) row[4]; String receiverIdsStr = (String) row[4];
String tagIdsStr = (String) row[5]; String tagIdsStr = (String) row[5];
String summaryHeadline = (String) row[6];
ParsedHighlight snippet = parseHighlight(snippetHeadline); ParsedHighlight snippet = parseHighlight(snippetHeadline);
ParsedHighlight summary = parseHighlight(summaryHeadline);
result.put(docId, new SearchMatchData( result.put(docId, new SearchMatchData(
snippet != null ? snippet.cleanText() : null, snippet != null ? snippet.cleanText() : null,
parseTitleOffsets(titleHeadline), parseTitleOffsets(titleHeadline),
senderMatched != null && senderMatched, senderMatched != null && senderMatched,
parseUUIDs(receiverIdsStr), parseUUIDs(receiverIdsStr),
parseUUIDs(tagIdsStr), parseUUIDs(tagIdsStr),
snippet != null ? snippet.offsets() : List.of() snippet != null ? snippet.offsets() : List.of(),
summary != null ? summary.cleanText() : null,
summary != null ? summary.offsets() : List.of()
)); ));
} }
return result; return result;

View File

@@ -125,7 +125,7 @@ class DocumentControllerTest {
.status(DocumentStatus.UPLOADED) .status(DocumentStatus.UPLOADED)
.build(); .build();
var matchData = new org.raddatz.familienarchiv.dto.SearchMatchData( var matchData = new org.raddatz.familienarchiv.dto.SearchMatchData(
"Er schrieb einen langen Brief", List.of(), false, List.of(), List.of(), List.of()); "Er schrieb einen langen Brief", List.of(), false, List.of(), List.of(), List.of(), null, List.of());
when(documentService.searchDocuments(any(), any(), any(), any(), any(), any(), any(), any(), any(), any())) when(documentService.searchDocuments(any(), any(), any(), any(), any(), any(), any(), any(), any(), any()))
.thenReturn(DocumentSearchResult.withMatchData(List.of(doc), Map.of(docId, matchData))); .thenReturn(DocumentSearchResult.withMatchData(List.of(doc), Map.of(docId, matchData)));

View File

@@ -36,7 +36,7 @@ class DocumentSearchResultTest {
@Test @Test
void withMatchData_exposes_match_data_map() { void withMatchData_exposes_match_data_map() {
UUID id = UUID.randomUUID(); UUID id = UUID.randomUUID();
SearchMatchData data = new SearchMatchData("snippet", List.of(), false, List.of(), List.of(), List.of()); SearchMatchData data = new SearchMatchData("snippet", List.of(), false, List.of(), List.of(), List.of(), null, List.of());
DocumentSearchResult result = DocumentSearchResult.withMatchData(List.of(doc(id)), Map.of(id, data)); DocumentSearchResult result = DocumentSearchResult.withMatchData(List.of(doc(id)), Map.of(id, data));
assertThat(result.matchData()).containsKey(id); assertThat(result.matchData()).containsKey(id);

View File

@@ -10,7 +10,7 @@ class SearchMatchDataTest {
@Test @Test
void transcription_snippet_is_nullable() { void transcription_snippet_is_nullable() {
SearchMatchData data = new SearchMatchData(null, List.of(), false, List.of(), List.of(), List.of()); SearchMatchData data = new SearchMatchData(null, List.of(), false, List.of(), List.of(), List.of(), null, List.of());
assertThat(data.transcriptionSnippet()).isNull(); assertThat(data.transcriptionSnippet()).isNull();
} }
@@ -35,6 +35,8 @@ class SearchMatchDataTest {
true, true,
List.of(), List.of(),
List.of(), List.of(),
List.of(),
null,
List.of() List.of()
); );
@@ -58,7 +60,9 @@ class SearchMatchDataTest {
false, false,
List.of(), List.of(),
List.of(), List.of(),
List.of(offset) List.of(offset),
null,
List.of()
); );
assertThat(data.snippetOffsets()).containsExactly(offset); assertThat(data.snippetOffsets()).containsExactly(offset);
} }

View File

@@ -1588,9 +1588,9 @@ export interface components {
totalPages?: number; totalPages?: number;
}; };
DocumentSearchResult: { DocumentSearchResult: {
documents?: components["schemas"]["Document"][]; documents: components["schemas"]["Document"][];
/** Format: int64 */ /** Format: int64 */
total?: number; total: number;
matchData: { matchData: {
[key: string]: components["schemas"]["SearchMatchData"]; [key: string]: components["schemas"]["SearchMatchData"];
}; };
@@ -1608,6 +1608,8 @@ export interface components {
matchedReceiverIds: string[]; matchedReceiverIds: string[];
matchedTagIds: string[]; matchedTagIds: string[];
snippetOffsets: components["schemas"]["MatchOffset"][]; snippetOffsets: components["schemas"]["MatchOffset"][];
summarySnippet?: string;
summaryOffsets: components["schemas"]["MatchOffset"][];
}; };
IncompleteDocumentDTO: { IncompleteDocumentDTO: {
/** Format: uuid */ /** Format: uuid */

View File

@@ -143,7 +143,8 @@ describe('DocumentList match snippets and highlights', () => {
senderMatched: false, senderMatched: false,
matchedReceiverIds: [], matchedReceiverIds: [],
matchedTagIds: [], matchedTagIds: [],
snippetOffsets: [] snippetOffsets: [],
summaryOffsets: []
} }
} }
}); });
@@ -169,7 +170,8 @@ describe('DocumentList match snippets and highlights', () => {
senderMatched: false, senderMatched: false,
matchedReceiverIds: [], matchedReceiverIds: [],
matchedTagIds: [], matchedTagIds: [],
snippetOffsets: [] snippetOffsets: [],
summaryOffsets: []
} }
} }
}); });
@@ -192,7 +194,8 @@ describe('DocumentList match snippets and highlights', () => {
senderMatched: false, senderMatched: false,
matchedReceiverIds: [], matchedReceiverIds: [],
matchedTagIds: [], matchedTagIds: [],
snippetOffsets: [] snippetOffsets: [],
summaryOffsets: []
} }
} }
}); });
@@ -213,7 +216,8 @@ describe('DocumentList match snippets and highlights', () => {
senderMatched: false, senderMatched: false,
matchedReceiverIds: [], matchedReceiverIds: [],
matchedTagIds: [], matchedTagIds: [],
snippetOffsets: [{ start: 17, length: 5 }] // "Brief" snippetOffsets: [{ start: 17, length: 5 }], // "Brief"
summaryOffsets: []
} }
} }
}); });
@@ -237,7 +241,8 @@ describe('DocumentList match snippets and highlights', () => {
senderMatched: false, senderMatched: false,
matchedReceiverIds: [], matchedReceiverIds: [],
matchedTagIds: [], matchedTagIds: [],
snippetOffsets: [] snippetOffsets: [],
summaryOffsets: []
} }
} }
}); });
@@ -268,7 +273,8 @@ describe('DocumentList match snippets and highlights', () => {
senderMatched: true, senderMatched: true,
matchedReceiverIds: [], matchedReceiverIds: [],
matchedTagIds: [], matchedTagIds: [],
snippetOffsets: [] snippetOffsets: [],
summaryOffsets: []
} }
} }
}); });
@@ -298,7 +304,8 @@ describe('DocumentList match snippets and highlights', () => {
senderMatched: false, senderMatched: false,
matchedReceiverIds: [], matchedReceiverIds: [],
matchedTagIds: [], matchedTagIds: [],
snippetOffsets: [] snippetOffsets: [],
summaryOffsets: []
} }
} }
}); });
@@ -324,7 +331,8 @@ describe('DocumentList match snippets and highlights', () => {
senderMatched: false, senderMatched: false,
matchedReceiverIds: ['p-1'], matchedReceiverIds: ['p-1'],
matchedTagIds: [], matchedTagIds: [],
snippetOffsets: [] snippetOffsets: [],
summaryOffsets: []
} }
} }
}); });
@@ -353,7 +361,8 @@ describe('DocumentList match snippets and highlights', () => {
senderMatched: false, senderMatched: false,
matchedReceiverIds: [], matchedReceiverIds: [],
matchedTagIds: ['tag-1'], matchedTagIds: ['tag-1'],
snippetOffsets: [] snippetOffsets: [],
summaryOffsets: []
} }
} }
}); });