feat(search): surface summary snippet when summary matched the query
Some checks failed
CI / Unit & Component Tests (push) Failing after 2m33s
CI / Backend Unit Tests (push) Failing after 2m44s

Add a summary_snippet column to findEnrichmentData using ts_headline on
documents.summary, only when the summary's tsvector matches the query.
Expose it via SearchMatchData.summarySnippet / summaryOffsets and render
a "Zusammenfassung" / "Summary" / "Resumen" labelled row in the document
list — identical treatment to the transcription snippet row.

Fixes the case where a document appeared in search results with no
visible match explanation (e.g. searching "frucht" found a document
whose summary mentioned "Früchte").

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit was merged in pull request #242.
This commit is contained in:
Marcel
2026-04-15 21:34:10 +02:00
committed by marcel
parent eb18d4f568
commit d7b2357834
7 changed files with 51 additions and 20 deletions

View File

@@ -7,8 +7,8 @@ import java.util.UUID;
/**
* Match signals for a single document in a full-text search result.
* All fields are non-null except {@code transcriptionSnippet}, which is null
* when no transcription block matched the query.
* All fields are non-null except {@code transcriptionSnippet} and {@code summarySnippet},
* which are null when the respective field did not match the query.
*/
public record SearchMatchData(
/**
@@ -46,10 +46,22 @@ public record SearchMatchData(
* Empty when no transcription block matched or the snippet has no highlights.
*/
@Schema(requiredMode = Schema.RequiredMode.REQUIRED)
List<MatchOffset> snippetOffsets
List<MatchOffset> snippetOffsets,
/**
* Highlighted summary excerpt, or null if the summary did not match the query.
*/
String summarySnippet,
/**
* Character offsets of highlighted terms within the summary snippet.
* Empty when the summary did not match or has no highlights.
*/
@Schema(requiredMode = Schema.RequiredMode.REQUIRED)
List<MatchOffset> summaryOffsets
) {
/** Canonical "no match data" value for a single document. */
public static SearchMatchData empty() {
return new SearchMatchData(null, List.of(), false, List.of(), List.of(), List.of());
return new SearchMatchData(null, List.of(), false, List.of(), List.of(), List.of(), null, List.of());
}
}

View File

@@ -607,14 +607,18 @@ public class DocumentService {
Boolean senderMatched = (Boolean) row[3];
String receiverIdsStr = (String) row[4];
String tagIdsStr = (String) row[5];
String summaryHeadline = (String) row[6];
ParsedHighlight snippet = parseHighlight(snippetHeadline);
ParsedHighlight summary = parseHighlight(summaryHeadline);
result.put(docId, new SearchMatchData(
snippet != null ? snippet.cleanText() : null,
parseTitleOffsets(titleHeadline),
senderMatched != null && senderMatched,
parseUUIDs(receiverIdsStr),
parseUUIDs(tagIdsStr),
snippet != null ? snippet.offsets() : List.of()
snippet != null ? snippet.offsets() : List.of(),
summary != null ? summary.cleanText() : null,
summary != null ? summary.offsets() : List.of()
));
}
return result;

View File

@@ -125,7 +125,7 @@ class DocumentControllerTest {
.status(DocumentStatus.UPLOADED)
.build();
var matchData = new org.raddatz.familienarchiv.dto.SearchMatchData(
"Er schrieb einen langen Brief", List.of(), false, List.of(), List.of(), List.of());
"Er schrieb einen langen Brief", List.of(), false, List.of(), List.of(), List.of(), null, List.of());
when(documentService.searchDocuments(any(), any(), any(), any(), any(), any(), any(), any(), any(), any()))
.thenReturn(DocumentSearchResult.withMatchData(List.of(doc), Map.of(docId, matchData)));

View File

@@ -36,7 +36,7 @@ class DocumentSearchResultTest {
@Test
void withMatchData_exposes_match_data_map() {
UUID id = UUID.randomUUID();
SearchMatchData data = new SearchMatchData("snippet", List.of(), false, List.of(), List.of(), List.of());
SearchMatchData data = new SearchMatchData("snippet", List.of(), false, List.of(), List.of(), List.of(), null, List.of());
DocumentSearchResult result = DocumentSearchResult.withMatchData(List.of(doc(id)), Map.of(id, data));
assertThat(result.matchData()).containsKey(id);

View File

@@ -10,7 +10,7 @@ class SearchMatchDataTest {
@Test
void transcription_snippet_is_nullable() {
SearchMatchData data = new SearchMatchData(null, List.of(), false, List.of(), List.of(), List.of());
SearchMatchData data = new SearchMatchData(null, List.of(), false, List.of(), List.of(), List.of(), null, List.of());
assertThat(data.transcriptionSnippet()).isNull();
}
@@ -35,6 +35,8 @@ class SearchMatchDataTest {
true,
List.of(),
List.of(),
List.of(),
null,
List.of()
);
@@ -58,7 +60,9 @@ class SearchMatchDataTest {
false,
List.of(),
List.of(),
List.of(offset)
List.of(offset),
null,
List.of()
);
assertThat(data.snippetOffsets()).containsExactly(offset);
}

View File

@@ -1588,9 +1588,9 @@ export interface components {
totalPages?: number;
};
DocumentSearchResult: {
documents?: components["schemas"]["Document"][];
documents: components["schemas"]["Document"][];
/** Format: int64 */
total?: number;
total: number;
matchData: {
[key: string]: components["schemas"]["SearchMatchData"];
};
@@ -1608,6 +1608,8 @@ export interface components {
matchedReceiverIds: string[];
matchedTagIds: string[];
snippetOffsets: components["schemas"]["MatchOffset"][];
summarySnippet?: string;
summaryOffsets: components["schemas"]["MatchOffset"][];
};
IncompleteDocumentDTO: {
/** Format: uuid */

View File

@@ -143,7 +143,8 @@ describe('DocumentList match snippets and highlights', () => {
senderMatched: false,
matchedReceiverIds: [],
matchedTagIds: [],
snippetOffsets: []
snippetOffsets: [],
summaryOffsets: []
}
}
});
@@ -169,7 +170,8 @@ describe('DocumentList match snippets and highlights', () => {
senderMatched: false,
matchedReceiverIds: [],
matchedTagIds: [],
snippetOffsets: []
snippetOffsets: [],
summaryOffsets: []
}
}
});
@@ -192,7 +194,8 @@ describe('DocumentList match snippets and highlights', () => {
senderMatched: false,
matchedReceiverIds: [],
matchedTagIds: [],
snippetOffsets: []
snippetOffsets: [],
summaryOffsets: []
}
}
});
@@ -213,7 +216,8 @@ describe('DocumentList match snippets and highlights', () => {
senderMatched: false,
matchedReceiverIds: [],
matchedTagIds: [],
snippetOffsets: [{ start: 17, length: 5 }] // "Brief"
snippetOffsets: [{ start: 17, length: 5 }], // "Brief"
summaryOffsets: []
}
}
});
@@ -237,7 +241,8 @@ describe('DocumentList match snippets and highlights', () => {
senderMatched: false,
matchedReceiverIds: [],
matchedTagIds: [],
snippetOffsets: []
snippetOffsets: [],
summaryOffsets: []
}
}
});
@@ -268,7 +273,8 @@ describe('DocumentList match snippets and highlights', () => {
senderMatched: true,
matchedReceiverIds: [],
matchedTagIds: [],
snippetOffsets: []
snippetOffsets: [],
summaryOffsets: []
}
}
});
@@ -298,7 +304,8 @@ describe('DocumentList match snippets and highlights', () => {
senderMatched: false,
matchedReceiverIds: [],
matchedTagIds: [],
snippetOffsets: []
snippetOffsets: [],
summaryOffsets: []
}
}
});
@@ -324,7 +331,8 @@ describe('DocumentList match snippets and highlights', () => {
senderMatched: false,
matchedReceiverIds: ['p-1'],
matchedTagIds: [],
snippetOffsets: []
snippetOffsets: [],
summaryOffsets: []
}
}
});
@@ -353,7 +361,8 @@ describe('DocumentList match snippets and highlights', () => {
senderMatched: false,
matchedReceiverIds: [],
matchedTagIds: ['tag-1'],
snippetOffsets: []
snippetOffsets: [],
summaryOffsets: []
}
}
});