feat(search): surface summary snippet when summary matched the query
Add a summary_snippet column to findEnrichmentData using ts_headline on documents.summary, only when the summary's tsvector matches the query. Expose it via SearchMatchData.summarySnippet / summaryOffsets and render a "Zusammenfassung" / "Summary" / "Resumen" labelled row in the document list — identical treatment to the transcription snippet row. Fixes the case where a document appeared in search results with no visible match explanation (e.g. searching "frucht" found a document whose summary mentioned "Früchte"). Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -7,8 +7,8 @@ import java.util.UUID;
|
||||
|
||||
/**
|
||||
* Match signals for a single document in a full-text search result.
|
||||
* All fields are non-null except {@code transcriptionSnippet}, which is null
|
||||
* when no transcription block matched the query.
|
||||
* All fields are non-null except {@code transcriptionSnippet} and {@code summarySnippet},
|
||||
* which are null when the respective field did not match the query.
|
||||
*/
|
||||
public record SearchMatchData(
|
||||
/**
|
||||
@@ -46,10 +46,22 @@ public record SearchMatchData(
|
||||
* Empty when no transcription block matched or the snippet has no highlights.
|
||||
*/
|
||||
@Schema(requiredMode = Schema.RequiredMode.REQUIRED)
|
||||
List<MatchOffset> snippetOffsets
|
||||
List<MatchOffset> snippetOffsets,
|
||||
|
||||
/**
|
||||
* Highlighted summary excerpt, or null if the summary did not match the query.
|
||||
*/
|
||||
String summarySnippet,
|
||||
|
||||
/**
|
||||
* Character offsets of highlighted terms within the summary snippet.
|
||||
* Empty when the summary did not match or has no highlights.
|
||||
*/
|
||||
@Schema(requiredMode = Schema.RequiredMode.REQUIRED)
|
||||
List<MatchOffset> summaryOffsets
|
||||
) {
|
||||
/** Canonical "no match data" value for a single document. */
|
||||
public static SearchMatchData empty() {
|
||||
return new SearchMatchData(null, List.of(), false, List.of(), List.of(), List.of());
|
||||
return new SearchMatchData(null, List.of(), false, List.of(), List.of(), List.of(), null, List.of());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -607,14 +607,18 @@ public class DocumentService {
|
||||
Boolean senderMatched = (Boolean) row[3];
|
||||
String receiverIdsStr = (String) row[4];
|
||||
String tagIdsStr = (String) row[5];
|
||||
String summaryHeadline = (String) row[6];
|
||||
ParsedHighlight snippet = parseHighlight(snippetHeadline);
|
||||
ParsedHighlight summary = parseHighlight(summaryHeadline);
|
||||
result.put(docId, new SearchMatchData(
|
||||
snippet != null ? snippet.cleanText() : null,
|
||||
parseTitleOffsets(titleHeadline),
|
||||
senderMatched != null && senderMatched,
|
||||
parseUUIDs(receiverIdsStr),
|
||||
parseUUIDs(tagIdsStr),
|
||||
snippet != null ? snippet.offsets() : List.of()
|
||||
snippet != null ? snippet.offsets() : List.of(),
|
||||
summary != null ? summary.cleanText() : null,
|
||||
summary != null ? summary.offsets() : List.of()
|
||||
));
|
||||
}
|
||||
return result;
|
||||
|
||||
@@ -125,7 +125,7 @@ class DocumentControllerTest {
|
||||
.status(DocumentStatus.UPLOADED)
|
||||
.build();
|
||||
var matchData = new org.raddatz.familienarchiv.dto.SearchMatchData(
|
||||
"Er schrieb einen langen Brief", List.of(), false, List.of(), List.of(), List.of());
|
||||
"Er schrieb einen langen Brief", List.of(), false, List.of(), List.of(), List.of(), null, List.of());
|
||||
when(documentService.searchDocuments(any(), any(), any(), any(), any(), any(), any(), any(), any(), any()))
|
||||
.thenReturn(DocumentSearchResult.withMatchData(List.of(doc), Map.of(docId, matchData)));
|
||||
|
||||
|
||||
@@ -36,7 +36,7 @@ class DocumentSearchResultTest {
|
||||
@Test
|
||||
void withMatchData_exposes_match_data_map() {
|
||||
UUID id = UUID.randomUUID();
|
||||
SearchMatchData data = new SearchMatchData("snippet", List.of(), false, List.of(), List.of(), List.of());
|
||||
SearchMatchData data = new SearchMatchData("snippet", List.of(), false, List.of(), List.of(), List.of(), null, List.of());
|
||||
DocumentSearchResult result = DocumentSearchResult.withMatchData(List.of(doc(id)), Map.of(id, data));
|
||||
|
||||
assertThat(result.matchData()).containsKey(id);
|
||||
|
||||
@@ -10,7 +10,7 @@ class SearchMatchDataTest {
|
||||
|
||||
@Test
|
||||
void transcription_snippet_is_nullable() {
|
||||
SearchMatchData data = new SearchMatchData(null, List.of(), false, List.of(), List.of(), List.of());
|
||||
SearchMatchData data = new SearchMatchData(null, List.of(), false, List.of(), List.of(), List.of(), null, List.of());
|
||||
|
||||
assertThat(data.transcriptionSnippet()).isNull();
|
||||
}
|
||||
@@ -35,6 +35,8 @@ class SearchMatchDataTest {
|
||||
true,
|
||||
List.of(),
|
||||
List.of(),
|
||||
List.of(),
|
||||
null,
|
||||
List.of()
|
||||
);
|
||||
|
||||
@@ -58,7 +60,9 @@ class SearchMatchDataTest {
|
||||
false,
|
||||
List.of(),
|
||||
List.of(),
|
||||
List.of(offset)
|
||||
List.of(offset),
|
||||
null,
|
||||
List.of()
|
||||
);
|
||||
assertThat(data.snippetOffsets()).containsExactly(offset);
|
||||
}
|
||||
|
||||
@@ -1588,9 +1588,9 @@ export interface components {
|
||||
totalPages?: number;
|
||||
};
|
||||
DocumentSearchResult: {
|
||||
documents?: components["schemas"]["Document"][];
|
||||
documents: components["schemas"]["Document"][];
|
||||
/** Format: int64 */
|
||||
total?: number;
|
||||
total: number;
|
||||
matchData: {
|
||||
[key: string]: components["schemas"]["SearchMatchData"];
|
||||
};
|
||||
@@ -1608,6 +1608,8 @@ export interface components {
|
||||
matchedReceiverIds: string[];
|
||||
matchedTagIds: string[];
|
||||
snippetOffsets: components["schemas"]["MatchOffset"][];
|
||||
summarySnippet?: string;
|
||||
summaryOffsets: components["schemas"]["MatchOffset"][];
|
||||
};
|
||||
IncompleteDocumentDTO: {
|
||||
/** Format: uuid */
|
||||
|
||||
@@ -143,7 +143,8 @@ describe('DocumentList – match snippets and highlights', () => {
|
||||
senderMatched: false,
|
||||
matchedReceiverIds: [],
|
||||
matchedTagIds: [],
|
||||
snippetOffsets: []
|
||||
snippetOffsets: [],
|
||||
summaryOffsets: []
|
||||
}
|
||||
}
|
||||
});
|
||||
@@ -169,7 +170,8 @@ describe('DocumentList – match snippets and highlights', () => {
|
||||
senderMatched: false,
|
||||
matchedReceiverIds: [],
|
||||
matchedTagIds: [],
|
||||
snippetOffsets: []
|
||||
snippetOffsets: [],
|
||||
summaryOffsets: []
|
||||
}
|
||||
}
|
||||
});
|
||||
@@ -192,7 +194,8 @@ describe('DocumentList – match snippets and highlights', () => {
|
||||
senderMatched: false,
|
||||
matchedReceiverIds: [],
|
||||
matchedTagIds: [],
|
||||
snippetOffsets: []
|
||||
snippetOffsets: [],
|
||||
summaryOffsets: []
|
||||
}
|
||||
}
|
||||
});
|
||||
@@ -213,7 +216,8 @@ describe('DocumentList – match snippets and highlights', () => {
|
||||
senderMatched: false,
|
||||
matchedReceiverIds: [],
|
||||
matchedTagIds: [],
|
||||
snippetOffsets: [{ start: 17, length: 5 }] // "Brief"
|
||||
snippetOffsets: [{ start: 17, length: 5 }], // "Brief"
|
||||
summaryOffsets: []
|
||||
}
|
||||
}
|
||||
});
|
||||
@@ -237,7 +241,8 @@ describe('DocumentList – match snippets and highlights', () => {
|
||||
senderMatched: false,
|
||||
matchedReceiverIds: [],
|
||||
matchedTagIds: [],
|
||||
snippetOffsets: []
|
||||
snippetOffsets: [],
|
||||
summaryOffsets: []
|
||||
}
|
||||
}
|
||||
});
|
||||
@@ -268,7 +273,8 @@ describe('DocumentList – match snippets and highlights', () => {
|
||||
senderMatched: true,
|
||||
matchedReceiverIds: [],
|
||||
matchedTagIds: [],
|
||||
snippetOffsets: []
|
||||
snippetOffsets: [],
|
||||
summaryOffsets: []
|
||||
}
|
||||
}
|
||||
});
|
||||
@@ -298,7 +304,8 @@ describe('DocumentList – match snippets and highlights', () => {
|
||||
senderMatched: false,
|
||||
matchedReceiverIds: [],
|
||||
matchedTagIds: [],
|
||||
snippetOffsets: []
|
||||
snippetOffsets: [],
|
||||
summaryOffsets: []
|
||||
}
|
||||
}
|
||||
});
|
||||
@@ -324,7 +331,8 @@ describe('DocumentList – match snippets and highlights', () => {
|
||||
senderMatched: false,
|
||||
matchedReceiverIds: ['p-1'],
|
||||
matchedTagIds: [],
|
||||
snippetOffsets: []
|
||||
snippetOffsets: [],
|
||||
summaryOffsets: []
|
||||
}
|
||||
}
|
||||
});
|
||||
@@ -353,7 +361,8 @@ describe('DocumentList – match snippets and highlights', () => {
|
||||
senderMatched: false,
|
||||
matchedReceiverIds: [],
|
||||
matchedTagIds: ['tag-1'],
|
||||
snippetOffsets: []
|
||||
snippetOffsets: [],
|
||||
summaryOffsets: []
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
Reference in New Issue
Block a user