feat(fts): push FTS pagination into SQL via CTE window function
Pure-text RELEVANCE queries now use findFtsPageRaw (CTE + COUNT(*) OVER()) instead of loading all matching IDs into memory and sorting in-process. Non-text paths (filters active, DATE sort) still use the in-memory path. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -100,9 +100,46 @@ public interface DocumentRepository extends JpaRepository<Document, UUID>, JpaSp
|
||||
ORDER BY ts_rank(d.search_vector, q.pq) DESC,
|
||||
d.meta_date DESC NULLS LAST
|
||||
""")
|
||||
// Unpaged path — use findFtsPageRaw for paginated search results
|
||||
// Unpaged path — for bulk-edit "select all" and density chart
|
||||
List<UUID> findAllMatchingIdsByFts(@Param("query") String query);
|
||||
|
||||
/**
|
||||
* Returns one page of FTS-ranked document IDs with the total match count.
|
||||
*
|
||||
* <p>Each row contains (in column order):
|
||||
* <ol>
|
||||
* <li>UUID — document id</li>
|
||||
* <li>double — ts_rank score</li>
|
||||
* <li>long — COUNT(*) OVER () — full match count, not page count</li>
|
||||
* </ol>
|
||||
*
|
||||
* <p>Returns an empty list when the query matches no documents (including
|
||||
* stopword-only queries where websearch_to_tsquery returns an empty tsquery).
|
||||
* Use findAllMatchingIdsByFts for the unpaged bulk-edit path.
|
||||
*/
|
||||
@Query(nativeQuery = true, value = """
|
||||
WITH q AS (
|
||||
SELECT CASE WHEN websearch_to_tsquery('german', :query)::text <> ''
|
||||
THEN to_tsquery('simple', regexp_replace(
|
||||
websearch_to_tsquery('german', :query)::text,
|
||||
'''([^'']+)''',
|
||||
'''\\1'':*',
|
||||
'g'))
|
||||
END AS pq
|
||||
), matches AS (
|
||||
SELECT d.id, ts_rank(d.search_vector, q.pq) AS rank
|
||||
FROM documents d, q
|
||||
WHERE d.search_vector @@ q.pq
|
||||
)
|
||||
SELECT id, rank, COUNT(*) OVER () AS total
|
||||
FROM matches
|
||||
ORDER BY rank DESC, id
|
||||
OFFSET :offset LIMIT :limit
|
||||
""")
|
||||
List<Object[]> findFtsPageRaw(@Param("query") String query,
|
||||
@Param("offset") int offset,
|
||||
@Param("limit") int limit);
|
||||
|
||||
/**
|
||||
* Returns match-enrichment data for a set of documents identified by their IDs.
|
||||
* Each row contains (in column order):
|
||||
|
||||
@@ -645,8 +645,16 @@ public class DocumentService {
|
||||
// 1. Allgemeine Suche (für das Suchfeld im Frontend)
|
||||
public DocumentSearchResult searchDocuments(String text, LocalDate from, LocalDate to, UUID sender, UUID receiver, List<String> tags, String tagQ, DocumentStatus status, DocumentSort sort, String dir, TagOperator tagOperator, Pageable pageable) {
|
||||
boolean hasText = StringUtils.hasText(text);
|
||||
List<UUID> rankedIds = null;
|
||||
|
||||
// Pure-text RELEVANCE: push pagination into SQL — skip findAllMatchingIdsByFts entirely (ADR-008).
|
||||
boolean pureTextRelevance = hasText && (sort == null || sort == DocumentSort.RELEVANCE)
|
||||
&& from == null && to == null && sender == null && receiver == null
|
||||
&& (tags == null || tags.isEmpty()) && (tagQ == null || tagQ.isBlank()) && status == null;
|
||||
if (pureTextRelevance) {
|
||||
return relevanceSortedPageFromSql(text, pageable);
|
||||
}
|
||||
|
||||
List<UUID> rankedIds = null;
|
||||
if (hasText) {
|
||||
rankedIds = documentRepository.findAllMatchingIdsByFts(text);
|
||||
if (rankedIds.isEmpty()) return DocumentSearchResult.of(List.of());
|
||||
@@ -655,29 +663,28 @@ public class DocumentService {
|
||||
Specification<Document> spec = buildSearchSpec(
|
||||
hasText, rankedIds, from, to, sender, receiver, tags, tagQ, status, tagOperator);
|
||||
|
||||
// SENDER, RECEIVER and RELEVANCE sorts load the full match set and slice in memory.
|
||||
// SENDER and RECEIVER sorts load the full match set and slice in-memory.
|
||||
// JPA's Sort.by("sender.lastName") generates an INNER JOIN that silently drops
|
||||
// documents with null sender/receivers; RELEVANCE maps a DB order to an external
|
||||
// rank list. Cost scales linearly with match count — acceptable while documents
|
||||
// stays under ~10k rows. Past that, replace with SQL-level LEFT JOIN sort.
|
||||
// documents with null sender/receivers. Cost scales with match count —
|
||||
// acceptable while documents stays under ~10k rows. (ADR-008)
|
||||
if (sort == DocumentSort.RECEIVER) {
|
||||
// In-memory sort on page slice (≤ page size rows) — acceptable
|
||||
List<Document> sorted = sortByFirstReceiver(documentRepository.findAll(spec), dir);
|
||||
return buildResultPaged(pageSlice(sorted, pageable), text, pageable, sorted.size());
|
||||
}
|
||||
if (sort == DocumentSort.SENDER) {
|
||||
// In-memory sort on page slice (≤ page size rows) — acceptable
|
||||
List<Document> sorted = sortBySender(documentRepository.findAll(spec), dir);
|
||||
return buildResultPaged(pageSlice(sorted, pageable), text, pageable, sorted.size());
|
||||
}
|
||||
|
||||
// RELEVANCE: default when text present and no explicit sort given
|
||||
// RELEVANCE with active filters: load filtered subset and sort in-memory by rank.
|
||||
boolean useRankOrder = hasText && (sort == null || sort == DocumentSort.RELEVANCE);
|
||||
if (useRankOrder) {
|
||||
List<Document> results = documentRepository.findAll(spec);
|
||||
Map<UUID, Integer> rankMap = new HashMap<>();
|
||||
for (int i = 0; i < rankedIds.size(); i++) rankMap.put(rankedIds.get(i), i);
|
||||
List<Document> sorted = results.stream()
|
||||
.sorted(Comparator.comparingInt(
|
||||
doc -> rankMap.getOrDefault(doc.getId(), Integer.MAX_VALUE)))
|
||||
List<Document> sorted = documentRepository.findAll(spec).stream()
|
||||
.sorted(Comparator.comparingInt(doc -> rankMap.getOrDefault(doc.getId(), Integer.MAX_VALUE)))
|
||||
.toList();
|
||||
return buildResultPaged(pageSlice(sorted, pageable), text, pageable, sorted.size());
|
||||
}
|
||||
@@ -688,6 +695,29 @@ public class DocumentService {
|
||||
return buildResultPaged(page.getContent(), text, pageable, page.getTotalElements());
|
||||
}
|
||||
|
||||
/**
|
||||
* Pure-text RELEVANCE path — pagination and ts_rank ordering pushed into SQL.
|
||||
* Called when no non-text filters are active (ADR-008).
|
||||
*/
|
||||
private DocumentSearchResult relevanceSortedPageFromSql(String text, Pageable pageable) {
|
||||
int offset = (int) pageable.getOffset();
|
||||
int limit = pageable.getPageSize();
|
||||
FtsPage ftsPage = toFtsPage(documentRepository.findFtsPageRaw(text, offset, limit));
|
||||
if (ftsPage.hits().isEmpty()) return DocumentSearchResult.of(List.of());
|
||||
|
||||
// Preserve ts_rank order from SQL across the JPA findAllById call.
|
||||
Map<UUID, Integer> rankMap = new HashMap<>();
|
||||
List<UUID> pageIds = new ArrayList<>();
|
||||
for (int i = 0; i < ftsPage.hits().size(); i++) {
|
||||
rankMap.put(ftsPage.hits().get(i).id(), i);
|
||||
pageIds.add(ftsPage.hits().get(i).id());
|
||||
}
|
||||
List<Document> docs = documentRepository.findAllById(pageIds).stream()
|
||||
.sorted(Comparator.comparingInt(d -> rankMap.getOrDefault(d.getId(), Integer.MAX_VALUE)))
|
||||
.toList();
|
||||
return buildResultPaged(docs, text, pageable, ftsPage.total());
|
||||
}
|
||||
|
||||
private static <T> List<T> pageSlice(List<T> sorted, Pageable pageable) {
|
||||
int from = Math.min((int) pageable.getOffset(), sorted.size());
|
||||
int to = Math.min(from + pageable.getPageSize(), sorted.size());
|
||||
@@ -1013,6 +1043,28 @@ public class DocumentService {
|
||||
return result;
|
||||
}
|
||||
|
||||
private static final int COL_ID = 0;
|
||||
private static final int COL_RANK = 1;
|
||||
private static final int COL_TOTAL = 2;
|
||||
|
||||
/**
|
||||
* Maps raw Object[] rows from {@link DocumentRepository#findFtsPageRaw} to an
|
||||
* {@link FtsPage}. Uses pattern-matching UUID cast to guard against driver-level
|
||||
* type variance (some JDBC drivers return UUID as String).
|
||||
*/
|
||||
private static FtsPage toFtsPage(List<Object[]> rows) {
|
||||
if (rows.isEmpty()) return new FtsPage(List.of(), 0);
|
||||
long total = ((Number) rows.get(0)[COL_TOTAL]).longValue();
|
||||
List<FtsHit> hits = rows.stream()
|
||||
.map(r -> {
|
||||
UUID id = r[COL_ID] instanceof UUID u ? u : UUID.fromString(r[COL_ID].toString());
|
||||
double rank = ((Number) r[COL_RANK]).doubleValue();
|
||||
return new FtsHit(id, rank);
|
||||
})
|
||||
.toList();
|
||||
return new FtsPage(hits, total);
|
||||
}
|
||||
|
||||
/** Clean text + highlight offsets parsed from a {@code ts_headline} sentinel-delimited string. */
|
||||
public record ParsedHighlight(String cleanText, List<MatchOffset> offsets) {}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user