From eb7edd1382d06ea1e19bf92526db1c4796f9d4d1 Mon Sep 17 00:00:00 2001 From: Marcel Date: Fri, 22 May 2026 17:43:48 +0200 Subject: [PATCH] refactor(document): replace DocumentSearchItem with flat DocumentListItem DTO MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Eliminates excessive data exposure (OWASP API3:2023) — transcription, filePath, fileHash, thumbnailKey, scriptType and other detail-only fields are no longer serialised in the list API response. Co-Authored-By: Claude Sonnet 4.6 --- .../document/DocumentListItem.java | 36 +++++++++++++++++++ .../document/DocumentSearchItem.java | 18 ---------- .../document/DocumentSearchResult.java | 14 ++------ .../document/DocumentService.java | 25 +++++++++++-- .../document/DocumentControllerTest.java | 33 ++++++++++++----- .../document/DocumentLazyLoadingTest.java | 2 +- .../DocumentSearchPagedIntegrationTest.java | 4 +-- .../document/DocumentSearchResultTest.java | 24 ++++++------- .../document/DocumentServiceSortTest.java | 8 ++--- .../document/DocumentServiceTest.java | 10 +++--- 10 files changed, 108 insertions(+), 66 deletions(-) create mode 100644 backend/src/main/java/org/raddatz/familienarchiv/document/DocumentListItem.java delete mode 100644 backend/src/main/java/org/raddatz/familienarchiv/document/DocumentSearchItem.java diff --git a/backend/src/main/java/org/raddatz/familienarchiv/document/DocumentListItem.java b/backend/src/main/java/org/raddatz/familienarchiv/document/DocumentListItem.java new file mode 100644 index 00000000..7cbc6496 --- /dev/null +++ b/backend/src/main/java/org/raddatz/familienarchiv/document/DocumentListItem.java @@ -0,0 +1,36 @@ +package org.raddatz.familienarchiv.document; + +import io.swagger.v3.oas.annotations.media.Schema; +import org.raddatz.familienarchiv.audit.ActivityActorDTO; +import org.raddatz.familienarchiv.person.Person; +import org.raddatz.familienarchiv.tag.Tag; + +import java.time.LocalDate; +import java.util.List; +import java.util.UUID; + +public record DocumentListItem( + @Schema(requiredMode = Schema.RequiredMode.REQUIRED) + UUID id, + @Schema(requiredMode = Schema.RequiredMode.REQUIRED) + String title, + @Schema(requiredMode = Schema.RequiredMode.REQUIRED) + String originalFilename, + String thumbnailUrl, + LocalDate documentDate, + Person sender, + @Schema(requiredMode = Schema.RequiredMode.REQUIRED) + List receivers, + @Schema(requiredMode = Schema.RequiredMode.REQUIRED) + List tags, + String archiveBox, + String archiveFolder, + String location, + String summary, + @Schema(requiredMode = Schema.RequiredMode.REQUIRED) + int completionPercentage, + @Schema(requiredMode = Schema.RequiredMode.REQUIRED) + List contributors, + @Schema(requiredMode = Schema.RequiredMode.REQUIRED) + SearchMatchData matchData +) {} diff --git a/backend/src/main/java/org/raddatz/familienarchiv/document/DocumentSearchItem.java b/backend/src/main/java/org/raddatz/familienarchiv/document/DocumentSearchItem.java deleted file mode 100644 index 58d31a03..00000000 --- a/backend/src/main/java/org/raddatz/familienarchiv/document/DocumentSearchItem.java +++ /dev/null @@ -1,18 +0,0 @@ -package org.raddatz.familienarchiv.document; - -import io.swagger.v3.oas.annotations.media.Schema; -import org.raddatz.familienarchiv.audit.ActivityActorDTO; -import org.raddatz.familienarchiv.document.Document; - -import java.util.List; - -public record DocumentSearchItem( - @Schema(requiredMode = Schema.RequiredMode.REQUIRED) - Document document, - @Schema(requiredMode = Schema.RequiredMode.REQUIRED) - SearchMatchData matchData, - @Schema(requiredMode = Schema.RequiredMode.REQUIRED) - int completionPercentage, - @Schema(requiredMode = Schema.RequiredMode.REQUIRED) - List contributors -) {} diff --git a/backend/src/main/java/org/raddatz/familienarchiv/document/DocumentSearchResult.java b/backend/src/main/java/org/raddatz/familienarchiv/document/DocumentSearchResult.java index e80efbd2..25b39184 100644 --- a/backend/src/main/java/org/raddatz/familienarchiv/document/DocumentSearchResult.java +++ b/backend/src/main/java/org/raddatz/familienarchiv/document/DocumentSearchResult.java @@ -7,7 +7,7 @@ import java.util.List; public record DocumentSearchResult( @Schema(requiredMode = Schema.RequiredMode.REQUIRED) - List items, + List items, @Schema(requiredMode = Schema.RequiredMode.REQUIRED) long totalElements, @Schema(requiredMode = Schema.RequiredMode.REQUIRED) @@ -17,20 +17,12 @@ public record DocumentSearchResult( @Schema(requiredMode = Schema.RequiredMode.REQUIRED) int totalPages ) { - /** - * Single-page convenience factory used by empty-result shortcuts and by tests that - * don't care about paging. Treats the whole list as page 0 of itself. - */ - public static DocumentSearchResult of(List items) { + public static DocumentSearchResult of(List items) { int size = items.size(); return new DocumentSearchResult(items, size, 0, size, size == 0 ? 0 : 1); } - /** - * Paged factory used by the service when it has a real Pageable + full match count - * (e.g. from Spring's Page or from an in-memory sort-then-slice). - */ - public static DocumentSearchResult paged(List slice, Pageable pageable, long totalElements) { + public static DocumentSearchResult paged(List slice, Pageable pageable, long totalElements) { int pageSize = pageable.getPageSize(); int totalPages = pageSize == 0 ? 0 : (int) ((totalElements + pageSize - 1) / pageSize); return new DocumentSearchResult(slice, totalElements, pageable.getPageNumber(), pageSize, totalPages); diff --git a/backend/src/main/java/org/raddatz/familienarchiv/document/DocumentService.java b/backend/src/main/java/org/raddatz/familienarchiv/document/DocumentService.java index a739764e..0a290c5b 100644 --- a/backend/src/main/java/org/raddatz/familienarchiv/document/DocumentService.java +++ b/backend/src/main/java/org/raddatz/familienarchiv/document/DocumentService.java @@ -10,7 +10,6 @@ import org.raddatz.familienarchiv.audit.AuditService; import org.raddatz.familienarchiv.document.DocumentBatchMetadataDTO; import org.raddatz.familienarchiv.document.DocumentBatchSummary; import org.raddatz.familienarchiv.document.DocumentBulkEditDTO; -import org.raddatz.familienarchiv.document.DocumentSearchItem; import org.raddatz.familienarchiv.document.DocumentSearchResult; import org.raddatz.familienarchiv.document.DocumentSort; import org.raddatz.familienarchiv.document.DocumentUpdateDTO; @@ -736,7 +735,7 @@ public class DocumentService { return DocumentSearchResult.paged(enrichItems(slice, text), pageable, totalElements); } - private List enrichItems(List documents, String text) { + private List enrichItems(List documents, String text) { List colorResolved = resolveDocumentTagColors(documents); Map matchData = enrichWithMatchData(colorResolved, text); @@ -744,7 +743,7 @@ public class DocumentService { Map completionByDoc = fetchCompletionPercentages(docIds); Map> contributorsByDoc = auditLogQueryService.findRecentContributorsPerDocument(docIds); - return colorResolved.stream().map(doc -> new DocumentSearchItem( + return colorResolved.stream().map(doc -> toListItem( doc, matchData.getOrDefault(doc.getId(), SearchMatchData.empty()), completionByDoc.getOrDefault(doc.getId(), 0), @@ -752,6 +751,26 @@ public class DocumentService { )).toList(); } + private DocumentListItem toListItem(Document doc, SearchMatchData match, int completionPct, List contributors) { + return new DocumentListItem( + doc.getId(), + doc.getTitle(), + doc.getOriginalFilename(), + doc.getThumbnailUrl(), + doc.getDocumentDate(), + doc.getSender(), + doc.getReceivers() != null ? List.copyOf(doc.getReceivers()) : List.of(), + doc.getTags() != null ? List.copyOf(doc.getTags()) : List.of(), + doc.getArchiveBox(), + doc.getArchiveFolder(), + doc.getLocation(), + doc.getSummary(), + completionPct, + contributors, + match + ); + } + private Map fetchCompletionPercentages(List docIds) { return transcriptionBlockQueryService.getCompletionStats(docIds); } diff --git a/backend/src/test/java/org/raddatz/familienarchiv/document/DocumentControllerTest.java b/backend/src/test/java/org/raddatz/familienarchiv/document/DocumentControllerTest.java index de2a79f3..f7c24541 100644 --- a/backend/src/test/java/org/raddatz/familienarchiv/document/DocumentControllerTest.java +++ b/backend/src/test/java/org/raddatz/familienarchiv/document/DocumentControllerTest.java @@ -27,7 +27,6 @@ import org.springframework.security.test.context.support.WithMockUser; import org.springframework.test.context.bean.override.mockito.MockitoBean; import org.springframework.test.web.servlet.MockMvc; -import org.raddatz.familienarchiv.document.DocumentSearchItem; import org.raddatz.familienarchiv.document.SearchMatchData; import java.time.LocalDateTime; @@ -130,16 +129,13 @@ class DocumentControllerTest { @WithMockUser void search_responseBodyItemsContainMatchData() throws Exception { UUID docId = UUID.randomUUID(); - Document doc = Document.builder() - .id(docId) - .title("Brief an Anna") - .originalFilename("brief.pdf") - .status(DocumentStatus.UPLOADED) - .build(); var matchData = new SearchMatchData( "Er schrieb einen langen Brief", List.of(), false, List.of(), List.of(), List.of(), null, List.of()); when(documentService.searchDocuments(any(), any(), any(), any(), any(), any(), any(), any(), any(), any(), any(), any())) - .thenReturn(DocumentSearchResult.of(List.of(new DocumentSearchItem(doc, matchData, 0, List.of())))); + .thenReturn(DocumentSearchResult.of(List.of(new DocumentListItem( + docId, "Brief an Anna", "brief.pdf", null, null, null, + List.of(), List.of(), null, null, null, null, + 0, List.of(), matchData)))); mockMvc.perform(get("/api/documents/search").param("q", "Brief")) .andExpect(status().isOk()) @@ -148,6 +144,27 @@ class DocumentControllerTest { .value("Er schrieb einen langen Brief")); } + @Test + @WithMockUser + void search_returns_flat_item_with_id_and_without_sensitive_fields() throws Exception { + UUID docId = UUID.randomUUID(); + var matchData = new SearchMatchData(null, List.of(), false, List.of(), List.of(), List.of(), null, List.of()); + when(documentService.searchDocuments(any(), any(), any(), any(), any(), any(), any(), any(), any(), any(), any(), any())) + .thenReturn(DocumentSearchResult.of(List.of(new DocumentListItem( + docId, "Brief an Anna", "brief.pdf", null, null, null, + List.of(), List.of(), null, null, null, null, + 0, List.of(), matchData)))); + + mockMvc.perform(get("/api/documents/search")) + .andExpect(status().isOk()) + // flat id field present at top of item (not nested under $.items[0].document.id) + .andExpect(jsonPath("$.items[0].id").value(docId.toString())) + // sensitive storage fields must never appear in list response + .andExpect(jsonPath("$.items[0].transcription").doesNotExist()) + .andExpect(jsonPath("$.items[0].filePath").doesNotExist()) + .andExpect(jsonPath("$.items[0].fileHash").doesNotExist()); + } + // ─── /api/documents/search pagination ───────────────────────────────────── @Test diff --git a/backend/src/test/java/org/raddatz/familienarchiv/document/DocumentLazyLoadingTest.java b/backend/src/test/java/org/raddatz/familienarchiv/document/DocumentLazyLoadingTest.java index bc33815f..62a2d843 100644 --- a/backend/src/test/java/org/raddatz/familienarchiv/document/DocumentLazyLoadingTest.java +++ b/backend/src/test/java/org/raddatz/familienarchiv/document/DocumentLazyLoadingTest.java @@ -127,7 +127,7 @@ class DocumentLazyLoadingTest { PageRequest.of(0, 20)); assertThat(result.totalElements()).isGreaterThan(0); assertThatCode(() -> - result.items().forEach(i -> i.document().getSender().getLastName())) + result.items().forEach(i -> { if (i.sender() != null) i.sender().getLastName(); })) .doesNotThrowAnyException(); } diff --git a/backend/src/test/java/org/raddatz/familienarchiv/document/DocumentSearchPagedIntegrationTest.java b/backend/src/test/java/org/raddatz/familienarchiv/document/DocumentSearchPagedIntegrationTest.java index 0ba61da3..c61c38af 100644 --- a/backend/src/test/java/org/raddatz/familienarchiv/document/DocumentSearchPagedIntegrationTest.java +++ b/backend/src/test/java/org/raddatz/familienarchiv/document/DocumentSearchPagedIntegrationTest.java @@ -125,10 +125,10 @@ class DocumentSearchPagedIntegrationTest { // No document id should appear on both pages — slicing must be exclusive. var idsOnPage0 = page0.items().stream() - .map(item -> item.document().getId()) + .map(item -> item.id()) .toList(); var idsOnPage1 = page1.items().stream() - .map(item -> item.document().getId()) + .map(item -> item.id()) .toList(); for (UUID id : idsOnPage0) { assertThat(idsOnPage1).doesNotContain(id); diff --git a/backend/src/test/java/org/raddatz/familienarchiv/document/DocumentSearchResultTest.java b/backend/src/test/java/org/raddatz/familienarchiv/document/DocumentSearchResultTest.java index cbf44933..1dd09fed 100644 --- a/backend/src/test/java/org/raddatz/familienarchiv/document/DocumentSearchResultTest.java +++ b/backend/src/test/java/org/raddatz/familienarchiv/document/DocumentSearchResultTest.java @@ -3,8 +3,6 @@ package org.raddatz.familienarchiv.document; import io.swagger.v3.oas.annotations.media.Schema; import org.junit.jupiter.api.Test; import org.raddatz.familienarchiv.audit.ActivityActorDTO; -import org.raddatz.familienarchiv.document.Document; -import org.raddatz.familienarchiv.document.DocumentStatus; import org.springframework.data.domain.PageRequest; import java.util.List; @@ -14,14 +12,11 @@ import static org.assertj.core.api.Assertions.assertThat; class DocumentSearchResultTest { - private DocumentSearchItem item(UUID docId) { - Document doc = Document.builder() - .id(docId) - .title("Test") - .originalFilename("test.pdf") - .status(DocumentStatus.UPLOADED) - .build(); - return new DocumentSearchItem(doc, SearchMatchData.empty(), 0, List.of()); + private DocumentListItem item(UUID docId) { + return new DocumentListItem( + docId, "Test", "test.pdf", null, null, null, + List.of(), List.of(), null, null, null, null, + 0, List.of(), SearchMatchData.empty()); } @Test @@ -45,7 +40,7 @@ class DocumentSearchResultTest { @Test void paged_factory_populates_paging_fields_from_pageable_and_total() { - List slice = List.of(item(UUID.randomUUID()), item(UUID.randomUUID())); + List slice = List.of(item(UUID.randomUUID()), item(UUID.randomUUID())); DocumentSearchResult result = DocumentSearchResult.paged(slice, PageRequest.of(1, 50), 120L); @@ -68,9 +63,10 @@ class DocumentSearchResultTest { void of_exposes_items_with_completion_and_contributors() { UUID id = UUID.randomUUID(); ActivityActorDTO actor = new ActivityActorDTO("AB", "#f00", "Anna Braun"); - Document doc = Document.builder().id(id).title("T").originalFilename("t.pdf") - .status(DocumentStatus.UPLOADED).build(); - DocumentSearchItem item = new DocumentSearchItem(doc, SearchMatchData.empty(), 75, List.of(actor)); + DocumentListItem item = new DocumentListItem( + id, "T", "t.pdf", null, null, null, + List.of(), List.of(), null, null, null, null, + 75, List.of(actor), SearchMatchData.empty()); DocumentSearchResult result = DocumentSearchResult.of(List.of(item)); diff --git a/backend/src/test/java/org/raddatz/familienarchiv/document/DocumentServiceSortTest.java b/backend/src/test/java/org/raddatz/familienarchiv/document/DocumentServiceSortTest.java index f7c4564f..abf6e389 100644 --- a/backend/src/test/java/org/raddatz/familienarchiv/document/DocumentServiceSortTest.java +++ b/backend/src/test/java/org/raddatz/familienarchiv/document/DocumentServiceSortTest.java @@ -70,7 +70,7 @@ class DocumentServiceSortTest { "Brief", null, null, null, null, null, null, null, DocumentSort.DATE, "DESC", null, PAGE); assertThat(result.items()).hasSize(2); - assertThat(result.items().get(0).document().getId()).isEqualTo(id2); // newer first + assertThat(result.items().get(0).id()).isEqualTo(id2); // newer first } // ─── RELEVANCE sort — pure text (no filters) ────────────────────────────── @@ -104,7 +104,7 @@ class DocumentServiceSortTest { DocumentSearchResult result = documentService.searchDocuments( "Brief", null, null, null, null, null, null, null, DocumentSort.RELEVANCE, null, null, PAGE); - assertThat(result.items().get(0).document().getId()).isEqualTo(id1); + assertThat(result.items().get(0).id()).isEqualTo(id1); } @Test @@ -121,7 +121,7 @@ class DocumentServiceSortTest { DocumentSearchResult result = documentService.searchDocuments( "Brief", null, null, null, null, null, null, null, null, null, null, PAGE); - assertThat(result.items().get(0).document().getId()).isEqualTo(id1); + assertThat(result.items().get(0).id()).isEqualTo(id1); } // ─── RELEVANCE sort — overflow guard ───────────────────────────────────── @@ -156,7 +156,7 @@ class DocumentServiceSortTest { DocumentSort.RELEVANCE, null, null, PAGE); assertThat(result.items()).hasSize(1); - assertThat(result.items().get(0).document().getId()).isEqualTo(uuidId); + assertThat(result.items().get(0).id()).isEqualTo(uuidId); } // ─── RELEVANCE sort — text + active filter ──────────────────────────────── diff --git a/backend/src/test/java/org/raddatz/familienarchiv/document/DocumentServiceTest.java b/backend/src/test/java/org/raddatz/familienarchiv/document/DocumentServiceTest.java index 186fd2aa..8ef7a6f2 100644 --- a/backend/src/test/java/org/raddatz/familienarchiv/document/DocumentServiceTest.java +++ b/backend/src/test/java/org/raddatz/familienarchiv/document/DocumentServiceTest.java @@ -11,7 +11,7 @@ import org.raddatz.familienarchiv.audit.AuditLogQueryService; import org.raddatz.familienarchiv.audit.AuditService; import org.raddatz.familienarchiv.document.annotation.AnnotationService; import org.raddatz.familienarchiv.document.transcription.TranscriptionBlockQueryService; -import org.raddatz.familienarchiv.document.DocumentSearchItem; +import org.raddatz.familienarchiv.document.DocumentListItem; import org.raddatz.familienarchiv.document.DocumentSearchResult; import org.raddatz.familienarchiv.document.DocumentSort; import org.raddatz.familienarchiv.document.DocumentUpdateDTO; @@ -1444,7 +1444,7 @@ class DocumentServiceTest { assertThat(result.totalPages()).isEqualTo(3); assertThat(result.items()).hasSize(50); // Page 1 (offset 50) under ascending sender sort should start at L050 - assertThat(result.items().get(0).document().getSender().getLastName()).isEqualTo("L050"); + assertThat(result.items().get(0).sender().getLastName()).isEqualTo("L050"); } @Test @@ -1565,7 +1565,7 @@ class DocumentServiceTest { null, null, null, null, null, null, null, null, DocumentSort.SENDER, "asc", null, UNPAGED); assertThat(result.items()).hasSize(2); - assertThat(result.items()).extracting(item -> item.document().getTitle()).containsExactly("Has Sender", "No Sender"); + assertThat(result.items()).extracting(DocumentListItem::title).containsExactly("Has Sender", "No Sender"); } // ─── searchDocuments — RECEIVER sort, empty receivers ─────────────────────── @@ -1584,7 +1584,7 @@ class DocumentServiceTest { DocumentSearchResult result = documentService.searchDocuments( null, null, null, null, null, null, null, null, DocumentSort.RECEIVER, "asc", null, UNPAGED); - assertThat(result.items()).extracting(item -> item.document().getTitle()) + assertThat(result.items()).extracting(DocumentListItem::title) .containsExactly("Has Receiver", "No Receivers"); } @@ -1607,7 +1607,7 @@ class DocumentServiceTest { null, null, null, null, null, null, null, null, DocumentSort.SENDER, "asc", null, UNPAGED); // null lastName should sort to end (treated as empty), not before "smith" (as "null") - assertThat(result.items()).extracting(item -> item.document().getTitle()) + assertThat(result.items()).extracting(DocumentListItem::title) .containsExactly("smith doc", "Null lastname doc"); }