feat(search): DocumentService.searchDocuments takes Pageable and slices

Fast path (DATE/TITLE/UPLOAD_DATE) pushes sort + paging into the DB via findAll(Specification, PageRequest) and enriches only the returned slice — 30× cheaper than enriching all 1500 matches when the user is only going to see 50. In-memory sort paths (SENDER/RECEIVER/RELEVANCE) keep their LEFT JOIN-friendly sort but now slice in-memory too, so enrichment still runs against the page slice only. Controller passes PageRequest.of(page, size) built from @RequestParam values. Plan-level "add @Validated" prerequisite comes in the next commit. All existing tests updated mechanically to pass a pageable argument (PageRequest.of(0, 10_000) as an "effectively unpaged" sentinel). Stubs that previously matched findAll(Specification, Sort) for the fast path now match findAll(Specification, Pageable) with PageImpl<>. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-04-24 08:23:15 +02:00
parent 1299bd5938
commit 7a75ffed76
5 changed files with 74 additions and 46 deletions
--- a/backend/src/main/java/org/raddatz/familienarchiv/controller/DocumentController.java
+++ b/backend/src/main/java/org/raddatz/familienarchiv/controller/DocumentController.java
@@ -13,6 +13,11 @@ import java.util.UUID;

 import io.swagger.v3.oas.annotations.Parameter;
 import io.swagger.v3.oas.annotations.responses.ApiResponse;
+import jakarta.validation.constraints.Max;
+import jakarta.validation.constraints.Min;
+import org.springframework.data.domain.PageRequest;
+import org.springframework.data.domain.Pageable;
+import org.springframework.validation.annotation.Validated;
 import org.raddatz.familienarchiv.dto.DocumentSearchResult;
 import org.raddatz.familienarchiv.dto.DocumentUpdateDTO;
 import org.raddatz.familienarchiv.dto.TagOperator;
@@ -62,6 +67,7 @@ import lombok.extern.slf4j.Slf4j;
@RequestMapping("/api/documents")
@RequiredArgsConstructor
@Slf4j
+@Validated
 public class DocumentController {

    private final DocumentService documentService;
@@ -252,14 +258,20 @@ public class DocumentController {
            @Parameter(description = "Filter by document status") @RequestParam(required = false) DocumentStatus status,
            @Parameter(description = "Sort field") @RequestParam(required = false) DocumentSort sort,
            @Parameter(description = "Sort direction: ASC or DESC") @RequestParam(required = false, defaultValue = "DESC") String dir,
-            @Parameter(description = "Tag operator: AND (default) or OR") @RequestParam(required = false) String tagOp) {
+            @Parameter(description = "Tag operator: AND (default) or OR") @RequestParam(required = false) String tagOp,
+            // @Max on page guards against overflow when pageable.getOffset() is computed
+            // as page * size — Integer.MAX_VALUE * 50 would wrap to a negative long, which
+            // Hibernate cheerfully turns into an invalid SQL OFFSET.
+            @Parameter(description = "Page number (0-indexed)") @RequestParam(defaultValue = "0") @Min(0) @Max(100_000) int page,
+            @Parameter(description = "Page size (max 100)") @RequestParam(defaultValue = "50") @Min(1) @Max(100) int size) {
        if (!"ASC".equalsIgnoreCase(dir) && !"DESC".equalsIgnoreCase(dir)) {
            throw new ResponseStatusException(HttpStatus.BAD_REQUEST, "dir must be ASC or DESC");
        }
        // tagOp is a raw String at the HTTP boundary; any value other than "OR" (case-insensitive)
        // defaults to AND, which matches the frontend default and keeps old clients working.
        TagOperator operator = "OR".equalsIgnoreCase(tagOp) ? TagOperator.OR : TagOperator.AND;
-        return ResponseEntity.ok(documentService.searchDocuments(q, from, to, senderId, receiverId, tags, tagQ, status, sort, dir, operator));
+        Pageable pageable = PageRequest.of(page, size);
+        return ResponseEntity.ok(documentService.searchDocuments(q, from, to, senderId, receiverId, tags, tagQ, status, sort, dir, operator, pageable));
    }

    // --- TRAINING LABELS ---
--- a/backend/src/main/java/org/raddatz/familienarchiv/service/DocumentService.java
+++ b/backend/src/main/java/org/raddatz/familienarchiv/service/DocumentService.java
@@ -22,7 +22,9 @@ import org.raddatz.familienarchiv.model.TrainingLabel;
 import org.raddatz.familienarchiv.model.Person;
 import org.raddatz.familienarchiv.model.Tag;
 import org.raddatz.familienarchiv.repository.DocumentRepository;
+import org.springframework.data.domain.Page;
 import org.springframework.data.domain.PageRequest;
+import org.springframework.data.domain.Pageable;
 import org.springframework.data.domain.Sort;
 import org.springframework.data.jpa.domain.Specification;
 import org.raddatz.familienarchiv.exception.DomainException;
@@ -355,7 +357,7 @@ public class DocumentService {
    }

    // 1. Allgemeine Suche (für das Suchfeld im Frontend)
-    public DocumentSearchResult searchDocuments(String text, LocalDate from, LocalDate to, UUID sender, UUID receiver, List<String> tags, String tagQ, DocumentStatus status, DocumentSort sort, String dir, TagOperator tagOperator) {
+    public DocumentSearchResult searchDocuments(String text, LocalDate from, LocalDate to, UUID sender, UUID receiver, List<String> tags, String tagQ, DocumentStatus status, DocumentSort sort, String dir, TagOperator tagOperator, Pageable pageable) {
        boolean hasText = StringUtils.hasText(text);
        List<UUID> rankedIds = null;

@@ -376,15 +378,18 @@ public class DocumentService {
                .and(hasTagPartial(tagQ))
                .and(hasStatus(status));

-        // SENDER and RECEIVER are sorted in-memory because JPA's Sort.by("sender.lastName")
-        // generates an INNER JOIN that silently drops documents with null sender/receivers.
+        // SENDER, RECEIVER and RELEVANCE sorts load the full match set and slice in memory.
+        // JPA's Sort.by("sender.lastName") generates an INNER JOIN that silently drops
+        // documents with null sender/receivers; RELEVANCE maps a DB order to an external
+        // rank list. Cost scales linearly with match count — acceptable while documents
+        // stays under ~10k rows. Past that, replace with SQL-level LEFT JOIN sort.
        if (sort == DocumentSort.RECEIVER) {
-            List<Document> results = documentRepository.findAll(spec);
-            return buildResult(sortByFirstReceiver(results, dir), text);
+            List<Document> sorted = sortByFirstReceiver(documentRepository.findAll(spec), dir);
+            return buildResultPaged(pageSlice(sorted, pageable), text, pageable, sorted.size());
        }
        if (sort == DocumentSort.SENDER) {
-            List<Document> results = documentRepository.findAll(spec);
-            return buildResult(sortBySender(results, dir), text);
+            List<Document> sorted = sortBySender(documentRepository.findAll(spec), dir);
+            return buildResultPaged(pageSlice(sorted, pageable), text, pageable, sorted.size());
        }

        // RELEVANCE: default when text present and no explicit sort given
@@ -397,15 +402,26 @@ public class DocumentService {
                    .sorted(Comparator.comparingInt(
                            doc -> rankMap.getOrDefault(doc.getId(), Integer.MAX_VALUE)))
                    .toList();
-            return buildResult(sorted, text);
+            return buildResultPaged(pageSlice(sorted, pageable), text, pageable, sorted.size());
        }

-        Sort springSort = resolveSort(sort, dir);
-        List<Document> results = documentRepository.findAll(spec, springSort);
-        return buildResult(results, text);
+        // Fast path — push sort + paging into the DB and enrich only the returned slice.
+        PageRequest pageRequest = PageRequest.of(pageable.getPageNumber(), pageable.getPageSize(), resolveSort(sort, dir));
+        Page<Document> page = documentRepository.findAll(spec, pageRequest);
+        return buildResultPaged(page.getContent(), text, pageable, page.getTotalElements());
    }

-    private DocumentSearchResult buildResult(List<Document> documents, String text) {
+    private static <T> List<T> pageSlice(List<T> sorted, Pageable pageable) {
+        int from = Math.min((int) pageable.getOffset(), sorted.size());
+        int to = Math.min(from + pageable.getPageSize(), sorted.size());
+        return sorted.subList(from, to);
+    }
+
+    private DocumentSearchResult buildResultPaged(List<Document> slice, String text, Pageable pageable, long totalElements) {
+        return DocumentSearchResult.paged(enrichItems(slice, text), pageable, totalElements);
+    }
+
+    private List<DocumentSearchItem> enrichItems(List<Document> documents, String text) {
        List<Document> colorResolved = resolveDocumentTagColors(documents);
        Map<UUID, SearchMatchData> matchData = enrichWithMatchData(colorResolved, text);

@@ -413,14 +429,12 @@ public class DocumentService {
        Map<UUID, Integer> completionByDoc = fetchCompletionPercentages(docIds);
        Map<UUID, List<ActivityActorDTO>> contributorsByDoc = auditLogQueryService.findRecentContributorsPerDocument(docIds);

-        List<DocumentSearchItem> items = colorResolved.stream().map(doc -> new DocumentSearchItem(
+        return colorResolved.stream().map(doc -> new DocumentSearchItem(
                doc,
                matchData.getOrDefault(doc.getId(), SearchMatchData.empty()),
                completionByDoc.getOrDefault(doc.getId(), 0),
                contributorsByDoc.getOrDefault(doc.getId(), List.of())
        )).toList();
-
-        return DocumentSearchResult.of(items);
    }

    private Map<UUID, Integer> fetchCompletionPercentages(List<UUID> docIds) {