From b017da22c32f6df69c6bf7b0b42f7142e98ed85d Mon Sep 17 00:00:00 2001
From: Marcel <marcel@familienarchiv>
Date: Sat, 9 May 2026 14:20:25 +0200
Subject: [PATCH] feat(fts): push FTS pagination into SQL via CTE window
 function

Pure-text RELEVANCE queries now use findFtsPageRaw (CTE + COUNT(*) OVER())
instead of loading all matching IDs into memory and sorting in-process.
Non-text paths (filters active, DATE sort) still use the in-memory path.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 .../document/DocumentRepository.java          | 39 +++++++++-
 .../document/DocumentService.java             | 72 ++++++++++++++++---
 2 files changed, 100 insertions(+), 11 deletions(-)
diff --git a/backend/src/main/java/org/raddatz/familienarchiv/document/DocumentRepository.java b/backend/src/main/java/org/raddatz/familienarchiv/document/DocumentRepository.java
index 66ed5d4a..e907e5f2 100644
--- a/backend/src/main/java/org/raddatz/familienarchiv/document/DocumentRepository.java
+++ b/backend/src/main/java/org/raddatz/familienarchiv/document/DocumentRepository.java
@@ -100,9 +100,46 @@ public interface DocumentRepository extends JpaRepository<Document, UUID>, JpaSp
             ORDER BY ts_rank(d.search_vector, q.pq) DESC,
                      d.meta_date DESC NULLS LAST
             """)
-    // Unpaged path — use findFtsPageRaw for paginated search results
+    // Unpaged path — for bulk-edit "select all" and density chart
     List<UUID> findAllMatchingIdsByFts(@Param("query") String query);
 
+    /**
+     * Returns one page of FTS-ranked document IDs with the total match count.
+     *
+     * <p>Each row contains (in column order):
+     * <ol>
+     *   <li>UUID   — document id</li>
+     *   <li>double — ts_rank score</li>
+     *   <li>long   — COUNT(*) OVER () — full match count, not page count</li>
+     * </ol>
+     *
+     * <p>Returns an empty list when the query matches no documents (including
+     * stopword-only queries where websearch_to_tsquery returns an empty tsquery).
+     * Use findAllMatchingIdsByFts for the unpaged bulk-edit path.
+     */
+    @Query(nativeQuery = true, value = """
+            WITH q AS (
+                SELECT CASE WHEN websearch_to_tsquery('german', :query)::text <> ''
+                            THEN to_tsquery('simple', regexp_replace(
+                                     websearch_to_tsquery('german', :query)::text,
+                                     '''([^'']+)''',
+                                     '''\\1'':*',
+                                     'g'))
+                       END AS pq
+            ), matches AS (
+                SELECT d.id, ts_rank(d.search_vector, q.pq) AS rank
+                FROM documents d, q
+                WHERE d.search_vector @@ q.pq
+            )
+            SELECT id, rank, COUNT(*) OVER () AS total
+            FROM matches
+            ORDER BY rank DESC, id
+            OFFSET :offset LIMIT :limit
+            """)
+    List<Object[]> findFtsPageRaw(@Param("query") String query,
+                                  @Param("offset") int offset,
+                                  @Param("limit") int limit);
+
     /**
      * Returns match-enrichment data for a set of documents identified by their IDs.
      * Each row contains (in column order):
diff --git a/backend/src/main/java/org/raddatz/familienarchiv/document/DocumentService.java b/backend/src/main/java/org/raddatz/familienarchiv/document/DocumentService.java
index cc126b37..0a056b7b 100644
--- a/backend/src/main/java/org/raddatz/familienarchiv/document/DocumentService.java
+++ b/backend/src/main/java/org/raddatz/familienarchiv/document/DocumentService.java
@@ -645,8 +645,16 @@ public class DocumentService {
     // 1. Allgemeine Suche (für das Suchfeld im Frontend)
     public DocumentSearchResult searchDocuments(String text, LocalDate from, LocalDate to, UUID sender, UUID receiver, List<String> tags, String tagQ, DocumentStatus status, DocumentSort sort, String dir, TagOperator tagOperator, Pageable pageable) {
         boolean hasText = StringUtils.hasText(text);
-        List<UUID> rankedIds = null;
 
+        // Pure-text RELEVANCE: push pagination into SQL — skip findAllMatchingIdsByFts entirely (ADR-008).
+        boolean pureTextRelevance = hasText && (sort == null || sort == DocumentSort.RELEVANCE)
+                && from == null && to == null && sender == null && receiver == null
+                && (tags == null || tags.isEmpty()) && (tagQ == null || tagQ.isBlank()) && status == null;
+        if (pureTextRelevance) {
+            return relevanceSortedPageFromSql(text, pageable);
+        }
+
+        List<UUID> rankedIds = null;
         if (hasText) {
             rankedIds = documentRepository.findAllMatchingIdsByFts(text);
             if (rankedIds.isEmpty()) return DocumentSearchResult.of(List.of());
@@ -655,29 +663,28 @@ public class DocumentService {
         Specification<Document> spec = buildSearchSpec(
                 hasText, rankedIds, from, to, sender, receiver, tags, tagQ, status, tagOperator);
 
-        // SENDER, RECEIVER and RELEVANCE sorts load the full match set and slice in memory.
+        // SENDER and RECEIVER sorts load the full match set and slice in-memory.
         // JPA's Sort.by("sender.lastName") generates an INNER JOIN that silently drops
-        // documents with null sender/receivers; RELEVANCE maps a DB order to an external
-        // rank list. Cost scales linearly with match count — acceptable while documents
-        // stays under ~10k rows. Past that, replace with SQL-level LEFT JOIN sort.
+        // documents with null sender/receivers. Cost scales with match count —
+        // acceptable while documents stays under ~10k rows. (ADR-008)
         if (sort == DocumentSort.RECEIVER) {
+            // In-memory sort on page slice (≤ page size rows) — acceptable
             List<Document> sorted = sortByFirstReceiver(documentRepository.findAll(spec), dir);
             return buildResultPaged(pageSlice(sorted, pageable), text, pageable, sorted.size());
         }
         if (sort == DocumentSort.SENDER) {
+            // In-memory sort on page slice (≤ page size rows) — acceptable
             List<Document> sorted = sortBySender(documentRepository.findAll(spec), dir);
             return buildResultPaged(pageSlice(sorted, pageable), text, pageable, sorted.size());
         }
 
-        // RELEVANCE: default when text present and no explicit sort given
+        // RELEVANCE with active filters: load filtered subset and sort in-memory by rank.
         boolean useRankOrder = hasText && (sort == null || sort == DocumentSort.RELEVANCE);
         if (useRankOrder) {
-            List<Document> results = documentRepository.findAll(spec);
             Map<UUID, Integer> rankMap = new HashMap<>();
             for (int i = 0; i < rankedIds.size(); i++) rankMap.put(rankedIds.get(i), i);
-            List<Document> sorted = results.stream()
-                    .sorted(Comparator.comparingInt(
-                            doc -> rankMap.getOrDefault(doc.getId(), Integer.MAX_VALUE)))
+            List<Document> sorted = documentRepository.findAll(spec).stream()
+                    .sorted(Comparator.comparingInt(doc -> rankMap.getOrDefault(doc.getId(), Integer.MAX_VALUE)))
                     .toList();
             return buildResultPaged(pageSlice(sorted, pageable), text, pageable, sorted.size());
         }
@@ -688,6 +695,29 @@ public class DocumentService {
         return buildResultPaged(page.getContent(), text, pageable, page.getTotalElements());
     }
 
+    /**
+     * Pure-text RELEVANCE path — pagination and ts_rank ordering pushed into SQL.
+     * Called when no non-text filters are active (ADR-008).
+     */
+    private DocumentSearchResult relevanceSortedPageFromSql(String text, Pageable pageable) {
+        int offset = (int) pageable.getOffset();
+        int limit = pageable.getPageSize();
+        FtsPage ftsPage = toFtsPage(documentRepository.findFtsPageRaw(text, offset, limit));
+        if (ftsPage.hits().isEmpty()) return DocumentSearchResult.of(List.of());
+
+        // Preserve ts_rank order from SQL across the JPA findAllById call.
+        Map<UUID, Integer> rankMap = new HashMap<>();
+        List<UUID> pageIds = new ArrayList<>();
+        for (int i = 0; i < ftsPage.hits().size(); i++) {
+            rankMap.put(ftsPage.hits().get(i).id(), i);
+            pageIds.add(ftsPage.hits().get(i).id());
+        }
+        List<Document> docs = documentRepository.findAllById(pageIds).stream()
+                .sorted(Comparator.comparingInt(d -> rankMap.getOrDefault(d.getId(), Integer.MAX_VALUE)))
+                .toList();
+        return buildResultPaged(docs, text, pageable, ftsPage.total());
+    }
+
     private static <T> List<T> pageSlice(List<T> sorted, Pageable pageable) {
         int from = Math.min((int) pageable.getOffset(), sorted.size());
         int to = Math.min(from + pageable.getPageSize(), sorted.size());
@@ -1013,6 +1043,28 @@ public class DocumentService {
         return result;
     }
 
+    private static final int COL_ID = 0;
+    private static final int COL_RANK = 1;
+    private static final int COL_TOTAL = 2;
+
+    /**
+     * Maps raw Object[] rows from {@link DocumentRepository#findFtsPageRaw} to an
+     * {@link FtsPage}. Uses pattern-matching UUID cast to guard against driver-level
+     * type variance (some JDBC drivers return UUID as String).
+     */
+    private static FtsPage toFtsPage(List<Object[]> rows) {
+        if (rows.isEmpty()) return new FtsPage(List.of(), 0);
+        long total = ((Number) rows.get(0)[COL_TOTAL]).longValue();
+        List<FtsHit> hits = rows.stream()
+                .map(r -> {
+                    UUID id = r[COL_ID] instanceof UUID u ? u : UUID.fromString(r[COL_ID].toString());
+                    double rank = ((Number) r[COL_RANK]).doubleValue();
+                    return new FtsHit(id, rank);
+                })
+                .toList();
+        return new FtsPage(hits, total);
+    }
+
     /** Clean text + highlight offsets parsed from a {@code ts_headline} sentinel-delimited string. */
     public record ParsedHighlight(String cleanText, List<MatchOffset> offsets) {}