feat(search): DocumentService.searchDocuments takes Pageable and slices

Fast path (DATE/TITLE/UPLOAD_DATE) pushes sort + paging into the DB via
findAll(Specification, PageRequest) and enriches only the returned slice
— 30× cheaper than enriching all 1500 matches when the user is only
going to see 50. In-memory sort paths (SENDER/RECEIVER/RELEVANCE) keep
their LEFT JOIN-friendly sort but now slice in-memory too, so enrichment
still runs against the page slice only.

Controller passes PageRequest.of(page, size) built from @RequestParam
values. Plan-level "add @Validated" prerequisite comes in the next commit.

All existing tests updated mechanically to pass a pageable argument
(PageRequest.of(0, 10_000) as an "effectively unpaged" sentinel). Stubs
that previously matched findAll(Specification, Sort) for the fast path
now match findAll(Specification, Pageable) with PageImpl<>.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
Marcel
2026-04-24 08:23:15 +02:00
committed by marcel
parent 1299bd5938
commit 7a75ffed76
5 changed files with 74 additions and 46 deletions

View File

@@ -13,6 +13,11 @@ import java.util.UUID;
import io.swagger.v3.oas.annotations.Parameter;
import io.swagger.v3.oas.annotations.responses.ApiResponse;
import jakarta.validation.constraints.Max;
import jakarta.validation.constraints.Min;
import org.springframework.data.domain.PageRequest;
import org.springframework.data.domain.Pageable;
import org.springframework.validation.annotation.Validated;
import org.raddatz.familienarchiv.dto.DocumentSearchResult;
import org.raddatz.familienarchiv.dto.DocumentUpdateDTO;
import org.raddatz.familienarchiv.dto.TagOperator;
@@ -62,6 +67,7 @@ import lombok.extern.slf4j.Slf4j;
@RequestMapping("/api/documents")
@RequiredArgsConstructor
@Slf4j
@Validated
public class DocumentController {
private final DocumentService documentService;
@@ -252,14 +258,20 @@ public class DocumentController {
@Parameter(description = "Filter by document status") @RequestParam(required = false) DocumentStatus status,
@Parameter(description = "Sort field") @RequestParam(required = false) DocumentSort sort,
@Parameter(description = "Sort direction: ASC or DESC") @RequestParam(required = false, defaultValue = "DESC") String dir,
@Parameter(description = "Tag operator: AND (default) or OR") @RequestParam(required = false) String tagOp) {
@Parameter(description = "Tag operator: AND (default) or OR") @RequestParam(required = false) String tagOp,
// @Max on page guards against overflow when pageable.getOffset() is computed
// as page * size — Integer.MAX_VALUE * 50 would wrap to a negative long, which
// Hibernate cheerfully turns into an invalid SQL OFFSET.
@Parameter(description = "Page number (0-indexed)") @RequestParam(defaultValue = "0") @Min(0) @Max(100_000) int page,
@Parameter(description = "Page size (max 100)") @RequestParam(defaultValue = "50") @Min(1) @Max(100) int size) {
if (!"ASC".equalsIgnoreCase(dir) && !"DESC".equalsIgnoreCase(dir)) {
throw new ResponseStatusException(HttpStatus.BAD_REQUEST, "dir must be ASC or DESC");
}
// tagOp is a raw String at the HTTP boundary; any value other than "OR" (case-insensitive)
// defaults to AND, which matches the frontend default and keeps old clients working.
TagOperator operator = "OR".equalsIgnoreCase(tagOp) ? TagOperator.OR : TagOperator.AND;
return ResponseEntity.ok(documentService.searchDocuments(q, from, to, senderId, receiverId, tags, tagQ, status, sort, dir, operator));
Pageable pageable = PageRequest.of(page, size);
return ResponseEntity.ok(documentService.searchDocuments(q, from, to, senderId, receiverId, tags, tagQ, status, sort, dir, operator, pageable));
}
// --- TRAINING LABELS ---

View File

@@ -22,7 +22,9 @@ import org.raddatz.familienarchiv.model.TrainingLabel;
import org.raddatz.familienarchiv.model.Person;
import org.raddatz.familienarchiv.model.Tag;
import org.raddatz.familienarchiv.repository.DocumentRepository;
import org.springframework.data.domain.Page;
import org.springframework.data.domain.PageRequest;
import org.springframework.data.domain.Pageable;
import org.springframework.data.domain.Sort;
import org.springframework.data.jpa.domain.Specification;
import org.raddatz.familienarchiv.exception.DomainException;
@@ -355,7 +357,7 @@ public class DocumentService {
}
// 1. Allgemeine Suche (für das Suchfeld im Frontend)
public DocumentSearchResult searchDocuments(String text, LocalDate from, LocalDate to, UUID sender, UUID receiver, List<String> tags, String tagQ, DocumentStatus status, DocumentSort sort, String dir, TagOperator tagOperator) {
public DocumentSearchResult searchDocuments(String text, LocalDate from, LocalDate to, UUID sender, UUID receiver, List<String> tags, String tagQ, DocumentStatus status, DocumentSort sort, String dir, TagOperator tagOperator, Pageable pageable) {
boolean hasText = StringUtils.hasText(text);
List<UUID> rankedIds = null;
@@ -376,15 +378,18 @@ public class DocumentService {
.and(hasTagPartial(tagQ))
.and(hasStatus(status));
// SENDER and RECEIVER are sorted in-memory because JPA's Sort.by("sender.lastName")
// generates an INNER JOIN that silently drops documents with null sender/receivers.
// SENDER, RECEIVER and RELEVANCE sorts load the full match set and slice in memory.
// JPA's Sort.by("sender.lastName") generates an INNER JOIN that silently drops
// documents with null sender/receivers; RELEVANCE maps a DB order to an external
// rank list. Cost scales linearly with match count — acceptable while documents
// stays under ~10k rows. Past that, replace with SQL-level LEFT JOIN sort.
if (sort == DocumentSort.RECEIVER) {
List<Document> results = documentRepository.findAll(spec);
return buildResult(sortByFirstReceiver(results, dir), text);
List<Document> sorted = sortByFirstReceiver(documentRepository.findAll(spec), dir);
return buildResultPaged(pageSlice(sorted, pageable), text, pageable, sorted.size());
}
if (sort == DocumentSort.SENDER) {
List<Document> results = documentRepository.findAll(spec);
return buildResult(sortBySender(results, dir), text);
List<Document> sorted = sortBySender(documentRepository.findAll(spec), dir);
return buildResultPaged(pageSlice(sorted, pageable), text, pageable, sorted.size());
}
// RELEVANCE: default when text present and no explicit sort given
@@ -397,15 +402,26 @@ public class DocumentService {
.sorted(Comparator.comparingInt(
doc -> rankMap.getOrDefault(doc.getId(), Integer.MAX_VALUE)))
.toList();
return buildResult(sorted, text);
return buildResultPaged(pageSlice(sorted, pageable), text, pageable, sorted.size());
}
Sort springSort = resolveSort(sort, dir);
List<Document> results = documentRepository.findAll(spec, springSort);
return buildResult(results, text);
// Fast path — push sort + paging into the DB and enrich only the returned slice.
PageRequest pageRequest = PageRequest.of(pageable.getPageNumber(), pageable.getPageSize(), resolveSort(sort, dir));
Page<Document> page = documentRepository.findAll(spec, pageRequest);
return buildResultPaged(page.getContent(), text, pageable, page.getTotalElements());
}
private DocumentSearchResult buildResult(List<Document> documents, String text) {
private static <T> List<T> pageSlice(List<T> sorted, Pageable pageable) {
int from = Math.min((int) pageable.getOffset(), sorted.size());
int to = Math.min(from + pageable.getPageSize(), sorted.size());
return sorted.subList(from, to);
}
private DocumentSearchResult buildResultPaged(List<Document> slice, String text, Pageable pageable, long totalElements) {
return DocumentSearchResult.paged(enrichItems(slice, text), pageable, totalElements);
}
private List<DocumentSearchItem> enrichItems(List<Document> documents, String text) {
List<Document> colorResolved = resolveDocumentTagColors(documents);
Map<UUID, SearchMatchData> matchData = enrichWithMatchData(colorResolved, text);
@@ -413,14 +429,12 @@ public class DocumentService {
Map<UUID, Integer> completionByDoc = fetchCompletionPercentages(docIds);
Map<UUID, List<ActivityActorDTO>> contributorsByDoc = auditLogQueryService.findRecentContributorsPerDocument(docIds);
List<DocumentSearchItem> items = colorResolved.stream().map(doc -> new DocumentSearchItem(
return colorResolved.stream().map(doc -> new DocumentSearchItem(
doc,
matchData.getOrDefault(doc.getId(), SearchMatchData.empty()),
completionByDoc.getOrDefault(doc.getId(), 0),
contributorsByDoc.getOrDefault(doc.getId(), List.of())
)).toList();
return DocumentSearchResult.of(items);
}
private Map<UUID, Integer> fetchCompletionPercentages(List<UUID> docIds) {