test(search): add DocumentSearchEnrichmentTest for findEnrichmentData native query

Tests lateral join best-block selection, chr(1)/chr(2) headline delimiters,
sender/receiver/tag match flags, and null cases for missing relations.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Marcel
2026-04-15 17:40:47 +02:00
parent 003d68ed21
commit 8526e6c0a1
2 changed files with 354 additions and 0 deletions

View File

@@ -89,4 +89,55 @@ public interface DocumentRepository extends JpaRepository<Document, UUID>, JpaSp
""")
List<UUID> findRankedIdsByFts(@Param("query") String query);
/**
* Returns match-enrichment data for a set of documents identified by their IDs.
* Each row contains (in column order):
* <ol>
* <li>UUID — document id</li>
* <li>String — title headline with \x01/\x02 delimiters around matched terms</li>
* <li>String — best-ranked matching transcription block text, or null</li>
* <li>Boolean — whether the sender's name matched the query</li>
* <li>String — comma-separated matched receiver UUIDs, or null</li>
* <li>String — comma-separated matched tag UUIDs, or null</li>
* </ol>
* Short-circuit before calling this method when {@code ids} is empty or {@code query} is blank.
*/
@Query(nativeQuery = true, value = """
SELECT
d.id,
ts_headline('german', d.title, websearch_to_tsquery('german', :query),
'StartSel=' || chr(1) || ',StopSel=' || chr(2) || ',HighlightAll=true')
AS title_headline,
best_block.text AS transcription_snippet,
(s.id IS NOT NULL AND
to_tsvector('german', COALESCE(s.first_name, '') || ' ' || COALESCE(s.last_name, ''))
@@ websearch_to_tsquery('german', :query))
AS sender_matched,
(SELECT string_agg(r.id::text, ',')
FROM document_receivers dr
JOIN persons r ON r.id = dr.person_id
WHERE dr.document_id = d.id
AND to_tsvector('german', COALESCE(r.first_name, '') || ' ' || r.last_name)
@@ websearch_to_tsquery('german', :query)
) AS matched_receiver_ids,
(SELECT string_agg(t.id::text, ',')
FROM document_tags dt
JOIN tag t ON t.id = dt.tag_id
WHERE dt.document_id = d.id
AND to_tsvector('german', t.name) @@ websearch_to_tsquery('german', :query)
) AS matched_tag_ids
FROM documents d
LEFT JOIN persons s ON s.id = d.sender_id
LEFT JOIN LATERAL (
SELECT tb.text
FROM transcription_blocks tb
WHERE tb.document_id = d.id
AND to_tsvector('german', tb.text) @@ websearch_to_tsquery('german', :query)
ORDER BY ts_rank(to_tsvector('german', tb.text), websearch_to_tsquery('german', :query)) DESC
LIMIT 1
) best_block ON true
WHERE d.id IN :ids
""")
List<Object[]> findEnrichmentData(@Param("ids") Collection<UUID> ids, @Param("query") String query);
}