From 0ec86220d3da98c045527af4c9ee095170366471 Mon Sep 17 00:00:00 2001 From: Marcel Date: Tue, 24 Mar 2026 17:32:29 +0100 Subject: [PATCH] feat(backend): add POST /api/admin/backfill-file-hashes endpoint - DocumentRepository: findByFileHashIsNullAndFilePathIsNotNull() - AnnotationRepository: findByDocumentIdAndFileHashIsNull() - FileService: downloadFileBytes() downloads raw bytes from S3 for hashing - AnnotationService: backfillAnnotationFileHashForDocument() sets hash on null-hash annotations - DocumentService: backfillFileHashes() iterates documents with null hash, downloads bytes, computes SHA-256, saves doc, then propagates hash to annotations - AdminController: POST /api/admin/backfill-file-hashes delegates to DocumentService Closes #56 Co-Authored-By: Claude Sonnet 4.6 --- .../controller/AdminController.java | 6 ++ .../repository/AnnotationRepository.java | 2 + .../repository/DocumentRepository.java | 2 + .../service/AnnotationService.java | 8 +++ .../service/DocumentService.java | 38 +++++++++++++ .../familienarchiv/service/FileService.java | 21 +++++++ .../controller/AdminControllerTest.java | 25 ++++++++ .../service/AnnotationServiceTest.java | 26 +++++++++ .../service/DocumentServiceTest.java | 57 +++++++++++++++++++ frontend/src/routes/admin/+page.svelte | 35 ++++++++++++ 10 files changed, 220 insertions(+) diff --git a/backend/src/main/java/org/raddatz/familienarchiv/controller/AdminController.java b/backend/src/main/java/org/raddatz/familienarchiv/controller/AdminController.java index 0f6f5c14..918697cc 100644 --- a/backend/src/main/java/org/raddatz/familienarchiv/controller/AdminController.java +++ b/backend/src/main/java/org/raddatz/familienarchiv/controller/AdminController.java @@ -41,4 +41,10 @@ public class AdminController { documentService.getDocumentsWithoutVersions()); return ResponseEntity.ok(new BackfillResult(count)); } + + @PostMapping("/backfill-file-hashes") + public ResponseEntity backfillFileHashes() { + int count = documentService.backfillFileHashes(); + return ResponseEntity.ok(new BackfillResult(count)); + } } diff --git a/backend/src/main/java/org/raddatz/familienarchiv/repository/AnnotationRepository.java b/backend/src/main/java/org/raddatz/familienarchiv/repository/AnnotationRepository.java index 66eb61e4..6e75fd59 100644 --- a/backend/src/main/java/org/raddatz/familienarchiv/repository/AnnotationRepository.java +++ b/backend/src/main/java/org/raddatz/familienarchiv/repository/AnnotationRepository.java @@ -14,4 +14,6 @@ public interface AnnotationRepository extends JpaRepository findByDocumentIdAndPageNumber(UUID documentId, int pageNumber); Optional findByIdAndDocumentId(UUID id, UUID documentId); + + List findByDocumentIdAndFileHashIsNull(UUID documentId); } diff --git a/backend/src/main/java/org/raddatz/familienarchiv/repository/DocumentRepository.java b/backend/src/main/java/org/raddatz/familienarchiv/repository/DocumentRepository.java index d2d6047b..46969526 100644 --- a/backend/src/main/java/org/raddatz/familienarchiv/repository/DocumentRepository.java +++ b/backend/src/main/java/org/raddatz/familienarchiv/repository/DocumentRepository.java @@ -37,6 +37,8 @@ public interface DocumentRepository extends JpaRepository, JpaSp @Query("SELECT d FROM Document d WHERE d.id NOT IN (SELECT DISTINCT dv.documentId FROM DocumentVersion dv)") List findDocumentsWithoutVersions(); + List findByFileHashIsNullAndFilePathIsNotNull(); + @Query("SELECT DISTINCT d FROM Document d " + "JOIN d.receivers r " + "WHERE " + diff --git a/backend/src/main/java/org/raddatz/familienarchiv/service/AnnotationService.java b/backend/src/main/java/org/raddatz/familienarchiv/service/AnnotationService.java index f6f0687a..f52c70b0 100644 --- a/backend/src/main/java/org/raddatz/familienarchiv/service/AnnotationService.java +++ b/backend/src/main/java/org/raddatz/familienarchiv/service/AnnotationService.java @@ -62,6 +62,14 @@ public class AnnotationService { annotationRepository.delete(annotation); } + @Transactional + public void backfillAnnotationFileHashForDocument(UUID documentId, String fileHash) { + annotationRepository.findByDocumentIdAndFileHashIsNull(documentId).forEach(a -> { + a.setFileHash(fileHash); + annotationRepository.save(a); + }); + } + // ─── private helpers ────────────────────────────────────────────────────── private boolean overlaps(DocumentAnnotation existing, CreateAnnotationDTO dto) { diff --git a/backend/src/main/java/org/raddatz/familienarchiv/service/DocumentService.java b/backend/src/main/java/org/raddatz/familienarchiv/service/DocumentService.java index 919211df..a79a8f22 100644 --- a/backend/src/main/java/org/raddatz/familienarchiv/service/DocumentService.java +++ b/backend/src/main/java/org/raddatz/familienarchiv/service/DocumentService.java @@ -18,6 +18,8 @@ import org.springframework.transaction.annotation.Transactional; import org.springframework.web.multipart.MultipartFile; import java.io.IOException; +import java.security.MessageDigest; +import java.security.NoSuchAlgorithmException; import java.time.LocalDate; import java.util.ArrayList; import java.util.Arrays; @@ -38,6 +40,7 @@ public class DocumentService { private final FileService fileService; private final TagService tagService; private final DocumentVersionService documentVersionService; + private final AnnotationService annotationService; /** * Lädt eine Datei hoch. @@ -282,4 +285,39 @@ public class DocumentService { }); tagService.delete(tagId); } + + @Transactional + public int backfillFileHashes() { + List docs = documentRepository.findByFileHashIsNullAndFilePathIsNotNull(); + int count = 0; + for (Document doc : docs) { + try { + byte[] bytes = fileService.downloadFileBytes(doc.getFilePath()); + String hash = sha256Hex(bytes); + doc.setFileHash(hash); + documentRepository.save(doc); + annotationService.backfillAnnotationFileHashForDocument(doc.getId(), hash); + count++; + } catch (Exception e) { + log.warn("Failed to backfill hash for document {}: {}", doc.getId(), e.getMessage()); + } + } + return count; + } + + // ─── private helpers ────────────────────────────────────────────────────── + + private static String sha256Hex(byte[] bytes) { + try { + MessageDigest digest = MessageDigest.getInstance("SHA-256"); + byte[] hash = digest.digest(bytes); + StringBuilder sb = new StringBuilder(64); + for (byte b : hash) { + sb.append(String.format("%02x", b)); + } + return sb.toString(); + } catch (NoSuchAlgorithmException e) { + throw new IllegalStateException("SHA-256 not available", e); + } + } } diff --git a/backend/src/main/java/org/raddatz/familienarchiv/service/FileService.java b/backend/src/main/java/org/raddatz/familienarchiv/service/FileService.java index f2142fdc..57e225c6 100644 --- a/backend/src/main/java/org/raddatz/familienarchiv/service/FileService.java +++ b/backend/src/main/java/org/raddatz/familienarchiv/service/FileService.java @@ -13,6 +13,7 @@ import org.springframework.web.multipart.MultipartFile; import org.springframework.core.io.InputStreamResource; import java.io.IOException; +import java.io.InputStream; import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; import java.util.UUID; @@ -85,6 +86,26 @@ public class FileService { } } + /** + * Downloads a file from S3/MinIO and returns its raw bytes. + * Used for hash backfill — callers are responsible for not calling this on large files unnecessarily. + */ + public byte[] downloadFileBytes(String s3Key) throws IOException { + try { + GetObjectRequest getObjectRequest = GetObjectRequest.builder() + .bucket(bucketName) + .key(s3Key) + .build(); + try (InputStream in = s3Client.getObject(getObjectRequest)) { + return in.readAllBytes(); + } + } catch (NoSuchKeyException e) { + throw new StorageFileNotFoundException("File not found in storage: " + s3Key); + } catch (S3Exception e) { + throw new IOException("Failed to download file from storage: " + e.getMessage(), e); + } + } + // ─── private helpers ────────────────────────────────────────────────────── private static String sha256Hex(byte[] bytes) { diff --git a/backend/src/test/java/org/raddatz/familienarchiv/controller/AdminControllerTest.java b/backend/src/test/java/org/raddatz/familienarchiv/controller/AdminControllerTest.java index b37eb3d7..a456183b 100644 --- a/backend/src/test/java/org/raddatz/familienarchiv/controller/AdminControllerTest.java +++ b/backend/src/test/java/org/raddatz/familienarchiv/controller/AdminControllerTest.java @@ -58,4 +58,29 @@ class AdminControllerTest { .andExpect(status().isOk()) .andExpect(jsonPath("$.count").value(1)); } + + // ─── POST /api/admin/backfill-file-hashes ────────────────────────────────── + + @Test + void backfillFileHashes_returns401_whenUnauthenticated() throws Exception { + mockMvc.perform(post("/api/admin/backfill-file-hashes")) + .andExpect(status().isUnauthorized()); + } + + @Test + @WithMockUser(roles = "USER") + void backfillFileHashes_returns403_whenNotAdmin() throws Exception { + mockMvc.perform(post("/api/admin/backfill-file-hashes")) + .andExpect(status().isForbidden()); + } + + @Test + @WithMockUser(authorities = "ADMIN") + void backfillFileHashes_returns200_withCount_whenAdmin() throws Exception { + when(documentService.backfillFileHashes()).thenReturn(3); + + mockMvc.perform(post("/api/admin/backfill-file-hashes")) + .andExpect(status().isOk()) + .andExpect(jsonPath("$.count").value(3)); + } } diff --git a/backend/src/test/java/org/raddatz/familienarchiv/service/AnnotationServiceTest.java b/backend/src/test/java/org/raddatz/familienarchiv/service/AnnotationServiceTest.java index 33019c9d..6337052a 100644 --- a/backend/src/test/java/org/raddatz/familienarchiv/service/AnnotationServiceTest.java +++ b/backend/src/test/java/org/raddatz/familienarchiv/service/AnnotationServiceTest.java @@ -157,4 +157,30 @@ class AnnotationServiceTest { assertThat(annotationService.listAnnotations(docId)).containsExactly(a); } + + // ─── backfillAnnotationFileHashForDocument ──────────────────────────────── + + @Test + void backfillAnnotationFileHashForDocument_setsHashOnAnnotationsWithNullHash() { + UUID docId = UUID.randomUUID(); + String hash = "abc123"; + DocumentAnnotation a = DocumentAnnotation.builder() + .id(UUID.randomUUID()).documentId(docId).build(); + when(annotationRepository.findByDocumentIdAndFileHashIsNull(docId)).thenReturn(List.of(a)); + + annotationService.backfillAnnotationFileHashForDocument(docId, hash); + + assertThat(a.getFileHash()).isEqualTo(hash); + verify(annotationRepository).save(a); + } + + @Test + void backfillAnnotationFileHashForDocument_doesNothingWhenNoAnnotations() { + UUID docId = UUID.randomUUID(); + when(annotationRepository.findByDocumentIdAndFileHashIsNull(docId)).thenReturn(List.of()); + + annotationService.backfillAnnotationFileHashForDocument(docId, "hash"); + + verify(annotationRepository, never()).save(any()); + } } diff --git a/backend/src/test/java/org/raddatz/familienarchiv/service/DocumentServiceTest.java b/backend/src/test/java/org/raddatz/familienarchiv/service/DocumentServiceTest.java index 1c8c413b..b6fc3dea 100644 --- a/backend/src/test/java/org/raddatz/familienarchiv/service/DocumentServiceTest.java +++ b/backend/src/test/java/org/raddatz/familienarchiv/service/DocumentServiceTest.java @@ -21,6 +21,7 @@ import java.util.UUID; import static org.assertj.core.api.Assertions.assertThat; import static org.assertj.core.api.Assertions.assertThatThrownBy; import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.eq; import static org.mockito.Mockito.*; @ExtendWith(MockitoExtension.class) @@ -31,6 +32,7 @@ class DocumentServiceTest { @Mock FileService fileService; @Mock TagService tagService; @Mock DocumentVersionService documentVersionService; + @Mock AnnotationService annotationService; @InjectMocks DocumentService documentService; // ─── getDocumentById ────────────────────────────────────────────────────── @@ -209,4 +211,59 @@ class DocumentServiceTest { verify(documentVersionService).recordVersion(any(Document.class)); } + + // ─── backfillFileHashes ─────────────────────────────────────────────────── + + @Test + void backfillFileHashes_skipsDocumentsWithNoFilePath() throws Exception { + Document noFile = Document.builder().id(UUID.randomUUID()).build(); + when(documentRepository.findByFileHashIsNullAndFilePathIsNotNull()).thenReturn(List.of()); + + int count = documentService.backfillFileHashes(); + + assertThat(count).isZero(); + verify(fileService, never()).downloadFileBytes(any()); + } + + @Test + void backfillFileHashes_computesHashAndSavesDocument() throws Exception { + UUID docId = UUID.randomUUID(); + Document doc = Document.builder().id(docId).filePath("documents/scan.pdf").build(); + when(documentRepository.findByFileHashIsNullAndFilePathIsNotNull()).thenReturn(List.of(doc)); + when(fileService.downloadFileBytes("documents/scan.pdf")).thenReturn(new byte[]{1, 2, 3}); + when(documentRepository.save(any())).thenAnswer(inv -> inv.getArgument(0)); + + documentService.backfillFileHashes(); + + assertThat(doc.getFileHash()).isNotNull().hasSize(64); + verify(documentRepository).save(doc); + } + + @Test + void backfillFileHashes_propagatesHashToAnnotations() throws Exception { + UUID docId = UUID.randomUUID(); + Document doc = Document.builder().id(docId).filePath("documents/scan.pdf").build(); + when(documentRepository.findByFileHashIsNullAndFilePathIsNotNull()).thenReturn(List.of(doc)); + when(fileService.downloadFileBytes("documents/scan.pdf")).thenReturn(new byte[]{1, 2, 3}); + when(documentRepository.save(any())).thenAnswer(inv -> inv.getArgument(0)); + + documentService.backfillFileHashes(); + + verify(annotationService).backfillAnnotationFileHashForDocument(eq(docId), any()); + } + + @Test + void backfillFileHashes_returnsCountOfUpdatedDocuments() throws Exception { + UUID id1 = UUID.randomUUID(); + UUID id2 = UUID.randomUUID(); + Document doc1 = Document.builder().id(id1).filePath("documents/a.pdf").build(); + Document doc2 = Document.builder().id(id2).filePath("documents/b.pdf").build(); + when(documentRepository.findByFileHashIsNullAndFilePathIsNotNull()).thenReturn(List.of(doc1, doc2)); + when(fileService.downloadFileBytes(any())).thenReturn(new byte[]{1}); + when(documentRepository.save(any())).thenAnswer(inv -> inv.getArgument(0)); + + int count = documentService.backfillFileHashes(); + + assertThat(count).isEqualTo(2); + } } diff --git a/frontend/src/routes/admin/+page.svelte b/frontend/src/routes/admin/+page.svelte index fdfc50a3..89ec4cff 100644 --- a/frontend/src/routes/admin/+page.svelte +++ b/frontend/src/routes/admin/+page.svelte @@ -11,6 +11,8 @@ let editingTagName = $state(''); let editingGroupId: string | null = $state(null); let backfillResult: number | null = $state(null); let backfillLoading = $state(false); +let backfillHashesResult: number | null = $state(null); +let backfillHashesLoading = $state(false); const availablePermissions = ['WRITE_ALL', 'ADMIN', 'ADMIN_USER', 'ADMIN_TAG', 'ADMIN_PERMISSION']; @@ -45,6 +47,20 @@ async function backfillVersions() { backfillLoading = false; } } + +async function backfillFileHashes() { + backfillHashesLoading = true; + backfillHashesResult = null; + try { + const res = await fetch('/api/admin/backfill-file-hashes', { method: 'POST' }); + if (res.ok) { + const data = await res.json(); + backfillHashesResult = data.count; + } + } finally { + backfillHashesLoading = false; + } +}
@@ -535,5 +551,24 @@ async function backfillVersions() {

{/if}
+ +
+

+ {m.admin_system_backfill_hashes_heading()} +

+

{m.admin_system_backfill_hashes_description()}

+ + {#if backfillHashesResult !== null} +

+ {m.admin_system_backfill_hashes_success({ count: backfillHashesResult })} +

+ {/if} +
{/if}