feat(search): DocumentService.searchDocuments takes Pageable and slices

Fast path (DATE/TITLE/UPLOAD_DATE) pushes sort + paging into the DB via
findAll(Specification, PageRequest) and enriches only the returned slice
— 30× cheaper than enriching all 1500 matches when the user is only
going to see 50. In-memory sort paths (SENDER/RECEIVER/RELEVANCE) keep
their LEFT JOIN-friendly sort but now slice in-memory too, so enrichment
still runs against the page slice only.

Controller passes PageRequest.of(page, size) built from @RequestParam
values. Plan-level "add @Validated" prerequisite comes in the next commit.

All existing tests updated mechanically to pass a pageable argument
(PageRequest.of(0, 10_000) as an "effectively unpaged" sentinel). Stubs
that previously matched findAll(Specification, Sort) for the fast path
now match findAll(Specification, Pageable) with PageImpl<>.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
Marcel
2026-04-24 08:23:15 +02:00
parent 1a70a3db6d
commit 782f845dc9
5 changed files with 74 additions and 46 deletions

View File

@@ -13,6 +13,11 @@ import java.util.UUID;
import io.swagger.v3.oas.annotations.Parameter;
import io.swagger.v3.oas.annotations.responses.ApiResponse;
import jakarta.validation.constraints.Max;
import jakarta.validation.constraints.Min;
import org.springframework.data.domain.PageRequest;
import org.springframework.data.domain.Pageable;
import org.springframework.validation.annotation.Validated;
import org.raddatz.familienarchiv.dto.DocumentSearchResult;
import org.raddatz.familienarchiv.dto.DocumentUpdateDTO;
import org.raddatz.familienarchiv.dto.TagOperator;
@@ -62,6 +67,7 @@ import lombok.extern.slf4j.Slf4j;
@RequestMapping("/api/documents")
@RequiredArgsConstructor
@Slf4j
@Validated
public class DocumentController {
private final DocumentService documentService;
@@ -252,14 +258,20 @@ public class DocumentController {
@Parameter(description = "Filter by document status") @RequestParam(required = false) DocumentStatus status,
@Parameter(description = "Sort field") @RequestParam(required = false) DocumentSort sort,
@Parameter(description = "Sort direction: ASC or DESC") @RequestParam(required = false, defaultValue = "DESC") String dir,
@Parameter(description = "Tag operator: AND (default) or OR") @RequestParam(required = false) String tagOp) {
@Parameter(description = "Tag operator: AND (default) or OR") @RequestParam(required = false) String tagOp,
// @Max on page guards against overflow when pageable.getOffset() is computed
// as page * size — Integer.MAX_VALUE * 50 would wrap to a negative long, which
// Hibernate cheerfully turns into an invalid SQL OFFSET.
@Parameter(description = "Page number (0-indexed)") @RequestParam(defaultValue = "0") @Min(0) @Max(100_000) int page,
@Parameter(description = "Page size (max 100)") @RequestParam(defaultValue = "50") @Min(1) @Max(100) int size) {
if (!"ASC".equalsIgnoreCase(dir) && !"DESC".equalsIgnoreCase(dir)) {
throw new ResponseStatusException(HttpStatus.BAD_REQUEST, "dir must be ASC or DESC");
}
// tagOp is a raw String at the HTTP boundary; any value other than "OR" (case-insensitive)
// defaults to AND, which matches the frontend default and keeps old clients working.
TagOperator operator = "OR".equalsIgnoreCase(tagOp) ? TagOperator.OR : TagOperator.AND;
return ResponseEntity.ok(documentService.searchDocuments(q, from, to, senderId, receiverId, tags, tagQ, status, sort, dir, operator));
Pageable pageable = PageRequest.of(page, size);
return ResponseEntity.ok(documentService.searchDocuments(q, from, to, senderId, receiverId, tags, tagQ, status, sort, dir, operator, pageable));
}
// --- TRAINING LABELS ---

View File

@@ -22,7 +22,9 @@ import org.raddatz.familienarchiv.model.TrainingLabel;
import org.raddatz.familienarchiv.model.Person;
import org.raddatz.familienarchiv.model.Tag;
import org.raddatz.familienarchiv.repository.DocumentRepository;
import org.springframework.data.domain.Page;
import org.springframework.data.domain.PageRequest;
import org.springframework.data.domain.Pageable;
import org.springframework.data.domain.Sort;
import org.springframework.data.jpa.domain.Specification;
import org.raddatz.familienarchiv.exception.DomainException;
@@ -355,7 +357,7 @@ public class DocumentService {
}
// 1. Allgemeine Suche (für das Suchfeld im Frontend)
public DocumentSearchResult searchDocuments(String text, LocalDate from, LocalDate to, UUID sender, UUID receiver, List<String> tags, String tagQ, DocumentStatus status, DocumentSort sort, String dir, TagOperator tagOperator) {
public DocumentSearchResult searchDocuments(String text, LocalDate from, LocalDate to, UUID sender, UUID receiver, List<String> tags, String tagQ, DocumentStatus status, DocumentSort sort, String dir, TagOperator tagOperator, Pageable pageable) {
boolean hasText = StringUtils.hasText(text);
List<UUID> rankedIds = null;
@@ -376,15 +378,18 @@ public class DocumentService {
.and(hasTagPartial(tagQ))
.and(hasStatus(status));
// SENDER and RECEIVER are sorted in-memory because JPA's Sort.by("sender.lastName")
// generates an INNER JOIN that silently drops documents with null sender/receivers.
// SENDER, RECEIVER and RELEVANCE sorts load the full match set and slice in memory.
// JPA's Sort.by("sender.lastName") generates an INNER JOIN that silently drops
// documents with null sender/receivers; RELEVANCE maps a DB order to an external
// rank list. Cost scales linearly with match count — acceptable while documents
// stays under ~10k rows. Past that, replace with SQL-level LEFT JOIN sort.
if (sort == DocumentSort.RECEIVER) {
List<Document> results = documentRepository.findAll(spec);
return buildResult(sortByFirstReceiver(results, dir), text);
List<Document> sorted = sortByFirstReceiver(documentRepository.findAll(spec), dir);
return buildResultPaged(pageSlice(sorted, pageable), text, pageable, sorted.size());
}
if (sort == DocumentSort.SENDER) {
List<Document> results = documentRepository.findAll(spec);
return buildResult(sortBySender(results, dir), text);
List<Document> sorted = sortBySender(documentRepository.findAll(spec), dir);
return buildResultPaged(pageSlice(sorted, pageable), text, pageable, sorted.size());
}
// RELEVANCE: default when text present and no explicit sort given
@@ -397,15 +402,26 @@ public class DocumentService {
.sorted(Comparator.comparingInt(
doc -> rankMap.getOrDefault(doc.getId(), Integer.MAX_VALUE)))
.toList();
return buildResult(sorted, text);
return buildResultPaged(pageSlice(sorted, pageable), text, pageable, sorted.size());
}
Sort springSort = resolveSort(sort, dir);
List<Document> results = documentRepository.findAll(spec, springSort);
return buildResult(results, text);
// Fast path — push sort + paging into the DB and enrich only the returned slice.
PageRequest pageRequest = PageRequest.of(pageable.getPageNumber(), pageable.getPageSize(), resolveSort(sort, dir));
Page<Document> page = documentRepository.findAll(spec, pageRequest);
return buildResultPaged(page.getContent(), text, pageable, page.getTotalElements());
}
private DocumentSearchResult buildResult(List<Document> documents, String text) {
private static <T> List<T> pageSlice(List<T> sorted, Pageable pageable) {
int from = Math.min((int) pageable.getOffset(), sorted.size());
int to = Math.min(from + pageable.getPageSize(), sorted.size());
return sorted.subList(from, to);
}
private DocumentSearchResult buildResultPaged(List<Document> slice, String text, Pageable pageable, long totalElements) {
return DocumentSearchResult.paged(enrichItems(slice, text), pageable, totalElements);
}
private List<DocumentSearchItem> enrichItems(List<Document> documents, String text) {
List<Document> colorResolved = resolveDocumentTagColors(documents);
Map<UUID, SearchMatchData> matchData = enrichWithMatchData(colorResolved, text);
@@ -413,14 +429,12 @@ public class DocumentService {
Map<UUID, Integer> completionByDoc = fetchCompletionPercentages(docIds);
Map<UUID, List<ActivityActorDTO>> contributorsByDoc = auditLogQueryService.findRecentContributorsPerDocument(docIds);
List<DocumentSearchItem> items = colorResolved.stream().map(doc -> new DocumentSearchItem(
return colorResolved.stream().map(doc -> new DocumentSearchItem(
doc,
matchData.getOrDefault(doc.getId(), SearchMatchData.empty()),
completionByDoc.getOrDefault(doc.getId(), 0),
contributorsByDoc.getOrDefault(doc.getId(), List.of())
)).toList();
return DocumentSearchResult.of(items);
}
private Map<UUID, Integer> fetchCompletionPercentages(List<UUID> docIds) {

View File

@@ -69,7 +69,7 @@ class DocumentControllerTest {
@Test
@WithMockUser
void search_returns200_whenAuthenticated() throws Exception {
when(documentService.searchDocuments(any(), any(), any(), any(), any(), any(), any(), any(), any(), any(), any()))
when(documentService.searchDocuments(any(), any(), any(), any(), any(), any(), any(), any(), any(), any(), any(), any()))
.thenReturn(DocumentSearchResult.of(List.of()));
mockMvc.perform(get("/api/documents/search"))
@@ -79,13 +79,13 @@ class DocumentControllerTest {
@Test
@WithMockUser
void search_withStatusParam_passesItToService() throws Exception {
when(documentService.searchDocuments(any(), any(), any(), any(), any(), any(), any(), eq(DocumentStatus.REVIEWED), any(), any(), any()))
when(documentService.searchDocuments(any(), any(), any(), any(), any(), any(), any(), eq(DocumentStatus.REVIEWED), any(), any(), any(), any()))
.thenReturn(DocumentSearchResult.of(List.of()));
mockMvc.perform(get("/api/documents/search").param("status", "REVIEWED"))
.andExpect(status().isOk());
verify(documentService).searchDocuments(any(), any(), any(), any(), any(), any(), any(), eq(DocumentStatus.REVIEWED), any(), any(), any());
verify(documentService).searchDocuments(any(), any(), any(), any(), any(), any(), any(), eq(DocumentStatus.REVIEWED), any(), any(), any(), any());
}
@Test
@@ -112,12 +112,12 @@ class DocumentControllerTest {
@Test
@WithMockUser
void search_responseContainsTotalCount() throws Exception {
when(documentService.searchDocuments(any(), any(), any(), any(), any(), any(), any(), any(), any(), any(), any()))
when(documentService.searchDocuments(any(), any(), any(), any(), any(), any(), any(), any(), any(), any(), any(), any()))
.thenReturn(DocumentSearchResult.of(List.of()));
mockMvc.perform(get("/api/documents/search"))
.andExpect(status().isOk())
.andExpect(jsonPath("$.total").value(0))
.andExpect(jsonPath("$.totalElements").value(0))
.andExpect(jsonPath("$.items").isArray());
}
@@ -133,7 +133,7 @@ class DocumentControllerTest {
.build();
var matchData = new SearchMatchData(
"Er schrieb einen langen Brief", List.of(), false, List.of(), List.of(), List.of(), null, List.of());
when(documentService.searchDocuments(any(), any(), any(), any(), any(), any(), any(), any(), any(), any(), any()))
when(documentService.searchDocuments(any(), any(), any(), any(), any(), any(), any(), any(), any(), any(), any(), any()))
.thenReturn(DocumentSearchResult.of(List.of(new DocumentSearchItem(doc, matchData, 0, List.of()))));
mockMvc.perform(get("/api/documents/search").param("q", "Brief"))

View File

@@ -11,7 +11,8 @@ import org.raddatz.familienarchiv.dto.DocumentSort;
import org.raddatz.familienarchiv.model.Document;
import org.raddatz.familienarchiv.model.DocumentStatus;
import org.raddatz.familienarchiv.repository.DocumentRepository;
import org.springframework.data.domain.Sort;
import org.springframework.data.domain.PageImpl;
import org.springframework.data.domain.Pageable;
import org.springframework.data.jpa.domain.Specification;
import java.time.LocalDate;
@@ -51,12 +52,12 @@ class DocumentServiceSortTest {
// FTS returns id1 first (higher rank), id2 second
when(documentRepository.findRankedIdsByFts("Brief")).thenReturn(List.of(id1, id2));
// findAll(spec, sort) — the correct date path — returns date-DESC order
when(documentRepository.findAll(any(Specification.class), any(Sort.class)))
.thenReturn(List.of(newer, older));
// findAll(spec, pageable) — the correct date path — returns date-DESC order
when(documentRepository.findAll(any(Specification.class), any(Pageable.class)))
.thenReturn(new PageImpl<>(List.of(newer, older)));
DocumentSearchResult result = documentService.searchDocuments(
"Brief", null, null, null, null, null, null, null, DocumentSort.DATE, "DESC", null);
"Brief", null, null, null, null, null, null, null, DocumentSort.DATE, "DESC", null, org.springframework.data.domain.PageRequest.of(0, 10_000));
// Expect: date order (newer 1960 first), NOT rank order (older 1940 first)
assertThat(result.items()).hasSize(2);
@@ -78,7 +79,7 @@ class DocumentServiceSortTest {
.thenReturn(List.of(doc2, doc1)); // unordered from DB
DocumentSearchResult result = documentService.searchDocuments(
"Brief", null, null, null, null, null, null, null, DocumentSort.RELEVANCE, null, null);
"Brief", null, null, null, null, null, null, null, DocumentSort.RELEVANCE, null, null, org.springframework.data.domain.PageRequest.of(0, 10_000));
// Expect: rank order restored (id1 first)
assertThat(result.items().get(0).document().getId()).isEqualTo(id1);
@@ -97,7 +98,7 @@ class DocumentServiceSortTest {
.thenReturn(List.of(doc2, doc1));
DocumentSearchResult result = documentService.searchDocuments(
"Brief", null, null, null, null, null, null, null, null, null, null);
"Brief", null, null, null, null, null, null, null, null, null, null, org.springframework.data.domain.PageRequest.of(0, 10_000));
assertThat(result.items().get(0).document().getId()).isEqualTo(id1);
}

View File

@@ -1327,22 +1327,22 @@ class DocumentServiceTest {
@Test
void searchDocuments_passesStatusSpecificationToRepository() {
when(documentRepository.findAll(any(org.springframework.data.jpa.domain.Specification.class), any(Sort.class)))
.thenReturn(List.of());
when(documentRepository.findAll(any(org.springframework.data.jpa.domain.Specification.class), any(Pageable.class)))
.thenReturn(new PageImpl<>(List.of()));
documentService.searchDocuments(null, null, null, null, null, null, null, DocumentStatus.REVIEWED, null, null, null);
documentService.searchDocuments(null, null, null, null, null, null, null, DocumentStatus.REVIEWED, null, null, null, org.springframework.data.domain.PageRequest.of(0, 10_000));
verify(documentRepository).findAll(any(org.springframework.data.jpa.domain.Specification.class), any(Sort.class));
verify(documentRepository).findAll(any(org.springframework.data.jpa.domain.Specification.class), any(Pageable.class));
}
@Test
void searchDocuments_withNullStatus_doesNotFilterByStatus() {
when(documentRepository.findAll(any(org.springframework.data.jpa.domain.Specification.class), any(Sort.class)))
.thenReturn(List.of());
when(documentRepository.findAll(any(org.springframework.data.jpa.domain.Specification.class), any(Pageable.class)))
.thenReturn(new PageImpl<>(List.of()));
documentService.searchDocuments(null, null, null, null, null, null, null, null, null, null, null);
documentService.searchDocuments(null, null, null, null, null, null, null, null, null, null, null, org.springframework.data.domain.PageRequest.of(0, 10_000));
verify(documentRepository).findAll(any(org.springframework.data.jpa.domain.Specification.class), any(Sort.class));
verify(documentRepository).findAll(any(org.springframework.data.jpa.domain.Specification.class), any(Pageable.class));
}
// ─── getRecentActivity ────────────────────────────────────────────────────
@@ -1418,7 +1418,7 @@ class DocumentServiceTest {
.thenReturn(List.of(withSender, noSender));
DocumentSearchResult result = documentService.searchDocuments(
null, null, null, null, null, null, null, null, DocumentSort.SENDER, "asc", null);
null, null, null, null, null, null, null, null, DocumentSort.SENDER, "asc", null, org.springframework.data.domain.PageRequest.of(0, 10_000));
assertThat(result.items()).hasSize(2);
assertThat(result.items()).extracting(item -> item.document().getTitle()).containsExactly("Has Sender", "No Sender");
@@ -1438,7 +1438,7 @@ class DocumentServiceTest {
.thenReturn(List.of(noReceivers, withReceiver));
DocumentSearchResult result = documentService.searchDocuments(
null, null, null, null, null, null, null, null, DocumentSort.RECEIVER, "asc", null);
null, null, null, null, null, null, null, null, DocumentSort.RECEIVER, "asc", null, org.springframework.data.domain.PageRequest.of(0, 10_000));
assertThat(result.items()).extracting(item -> item.document().getTitle())
.containsExactly("Has Receiver", "No Receivers");
@@ -1460,7 +1460,7 @@ class DocumentServiceTest {
.thenReturn(List.of(docNullName, docSmith));
DocumentSearchResult result = documentService.searchDocuments(
null, null, null, null, null, null, null, null, DocumentSort.SENDER, "asc", null);
null, null, null, null, null, null, null, null, DocumentSort.SENDER, "asc", null, org.springframework.data.domain.PageRequest.of(0, 10_000));
// null lastName should sort to end (treated as empty), not before "smith" (as "null")
assertThat(result.items()).extracting(item -> item.document().getTitle())
@@ -1482,7 +1482,7 @@ class DocumentServiceTest {
when(documentRepository.findEnrichmentData(any(), eq("Brief"))).thenReturn(rows);
DocumentSearchResult result = documentService.searchDocuments(
"Brief", null, null, null, null, null, null, null, DocumentSort.RELEVANCE, null, null);
"Brief", null, null, null, null, null, null, null, DocumentSort.RELEVANCE, null, null, org.springframework.data.domain.PageRequest.of(0, 10_000));
assertThat(result.items()).hasSize(1);
SearchMatchData md = result.items().get(0).matchData();
@@ -1492,11 +1492,12 @@ class DocumentServiceTest {
@Test
void searchDocuments_withoutTextQuery_returnsEmptyMatchData() {
when(documentRepository.findAll(any(org.springframework.data.jpa.domain.Specification.class), any(Sort.class)))
.thenReturn(List.of());
when(documentRepository.findAll(any(org.springframework.data.jpa.domain.Specification.class), any(Pageable.class)))
.thenReturn(new PageImpl<>(List.of()));
DocumentSearchResult result = documentService.searchDocuments(
null, null, null, null, null, null, null, null, null, null, null);
null, null, null, null, null, null, null, null, null, null, null,
org.springframework.data.domain.PageRequest.of(0, 10_000));
assertThat(result.items()).isEmpty();
}
@@ -1515,7 +1516,7 @@ class DocumentServiceTest {
when(documentRepository.findEnrichmentData(any(), eq("Brief"))).thenReturn(rows);
DocumentSearchResult result = documentService.searchDocuments(
"Brief", null, null, null, null, null, null, null, DocumentSort.RELEVANCE, null, null);
"Brief", null, null, null, null, null, null, null, DocumentSort.RELEVANCE, null, null, org.springframework.data.domain.PageRequest.of(0, 10_000));
SearchMatchData md = result.items().get(0).matchData();
assertThat(md.transcriptionSnippet()).isEqualTo("Hier ist der Brief aus Berlin");