feat(backend): add POST /api/admin/backfill-file-hashes endpoint

- DocumentRepository: findByFileHashIsNullAndFilePathIsNotNull()
- AnnotationRepository: findByDocumentIdAndFileHashIsNull()
- FileService: downloadFileBytes() downloads raw bytes from S3 for hashing
- AnnotationService: backfillAnnotationFileHashForDocument() sets hash on null-hash annotations
- DocumentService: backfillFileHashes() iterates documents with null hash,
  downloads bytes, computes SHA-256, saves doc, then propagates hash to annotations
- AdminController: POST /api/admin/backfill-file-hashes delegates to DocumentService

Closes #56

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Marcel
2026-03-24 17:32:29 +01:00
parent 7fbc33b32d
commit 0ec86220d3
10 changed files with 220 additions and 0 deletions

View File

@@ -41,4 +41,10 @@ public class AdminController {
documentService.getDocumentsWithoutVersions());
return ResponseEntity.ok(new BackfillResult(count));
}
@PostMapping("/backfill-file-hashes")
public ResponseEntity<BackfillResult> backfillFileHashes() {
int count = documentService.backfillFileHashes();
return ResponseEntity.ok(new BackfillResult(count));
}
}

View File

@@ -14,4 +14,6 @@ public interface AnnotationRepository extends JpaRepository<DocumentAnnotation,
List<DocumentAnnotation> findByDocumentIdAndPageNumber(UUID documentId, int pageNumber);
Optional<DocumentAnnotation> findByIdAndDocumentId(UUID id, UUID documentId);
List<DocumentAnnotation> findByDocumentIdAndFileHashIsNull(UUID documentId);
}

View File

@@ -37,6 +37,8 @@ public interface DocumentRepository extends JpaRepository<Document, UUID>, JpaSp
@Query("SELECT d FROM Document d WHERE d.id NOT IN (SELECT DISTINCT dv.documentId FROM DocumentVersion dv)")
List<Document> findDocumentsWithoutVersions();
List<Document> findByFileHashIsNullAndFilePathIsNotNull();
@Query("SELECT DISTINCT d FROM Document d " +
"JOIN d.receivers r " +
"WHERE " +

View File

@@ -62,6 +62,14 @@ public class AnnotationService {
annotationRepository.delete(annotation);
}
@Transactional
public void backfillAnnotationFileHashForDocument(UUID documentId, String fileHash) {
annotationRepository.findByDocumentIdAndFileHashIsNull(documentId).forEach(a -> {
a.setFileHash(fileHash);
annotationRepository.save(a);
});
}
// ─── private helpers ──────────────────────────────────────────────────────
private boolean overlaps(DocumentAnnotation existing, CreateAnnotationDTO dto) {

View File

@@ -18,6 +18,8 @@ import org.springframework.transaction.annotation.Transactional;
import org.springframework.web.multipart.MultipartFile;
import java.io.IOException;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.time.LocalDate;
import java.util.ArrayList;
import java.util.Arrays;
@@ -38,6 +40,7 @@ public class DocumentService {
private final FileService fileService;
private final TagService tagService;
private final DocumentVersionService documentVersionService;
private final AnnotationService annotationService;
/**
* Lädt eine Datei hoch.
@@ -282,4 +285,39 @@ public class DocumentService {
});
tagService.delete(tagId);
}
@Transactional
public int backfillFileHashes() {
List<Document> docs = documentRepository.findByFileHashIsNullAndFilePathIsNotNull();
int count = 0;
for (Document doc : docs) {
try {
byte[] bytes = fileService.downloadFileBytes(doc.getFilePath());
String hash = sha256Hex(bytes);
doc.setFileHash(hash);
documentRepository.save(doc);
annotationService.backfillAnnotationFileHashForDocument(doc.getId(), hash);
count++;
} catch (Exception e) {
log.warn("Failed to backfill hash for document {}: {}", doc.getId(), e.getMessage());
}
}
return count;
}
// ─── private helpers ──────────────────────────────────────────────────────
private static String sha256Hex(byte[] bytes) {
try {
MessageDigest digest = MessageDigest.getInstance("SHA-256");
byte[] hash = digest.digest(bytes);
StringBuilder sb = new StringBuilder(64);
for (byte b : hash) {
sb.append(String.format("%02x", b));
}
return sb.toString();
} catch (NoSuchAlgorithmException e) {
throw new IllegalStateException("SHA-256 not available", e);
}
}
}

View File

@@ -13,6 +13,7 @@ import org.springframework.web.multipart.MultipartFile;
import org.springframework.core.io.InputStreamResource;
import java.io.IOException;
import java.io.InputStream;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.UUID;
@@ -85,6 +86,26 @@ public class FileService {
}
}
/**
* Downloads a file from S3/MinIO and returns its raw bytes.
* Used for hash backfill — callers are responsible for not calling this on large files unnecessarily.
*/
public byte[] downloadFileBytes(String s3Key) throws IOException {
try {
GetObjectRequest getObjectRequest = GetObjectRequest.builder()
.bucket(bucketName)
.key(s3Key)
.build();
try (InputStream in = s3Client.getObject(getObjectRequest)) {
return in.readAllBytes();
}
} catch (NoSuchKeyException e) {
throw new StorageFileNotFoundException("File not found in storage: " + s3Key);
} catch (S3Exception e) {
throw new IOException("Failed to download file from storage: " + e.getMessage(), e);
}
}
// ─── private helpers ──────────────────────────────────────────────────────
private static String sha256Hex(byte[] bytes) {

View File

@@ -58,4 +58,29 @@ class AdminControllerTest {
.andExpect(status().isOk())
.andExpect(jsonPath("$.count").value(1));
}
// ─── POST /api/admin/backfill-file-hashes ──────────────────────────────────
@Test
void backfillFileHashes_returns401_whenUnauthenticated() throws Exception {
mockMvc.perform(post("/api/admin/backfill-file-hashes"))
.andExpect(status().isUnauthorized());
}
@Test
@WithMockUser(roles = "USER")
void backfillFileHashes_returns403_whenNotAdmin() throws Exception {
mockMvc.perform(post("/api/admin/backfill-file-hashes"))
.andExpect(status().isForbidden());
}
@Test
@WithMockUser(authorities = "ADMIN")
void backfillFileHashes_returns200_withCount_whenAdmin() throws Exception {
when(documentService.backfillFileHashes()).thenReturn(3);
mockMvc.perform(post("/api/admin/backfill-file-hashes"))
.andExpect(status().isOk())
.andExpect(jsonPath("$.count").value(3));
}
}

View File

@@ -157,4 +157,30 @@ class AnnotationServiceTest {
assertThat(annotationService.listAnnotations(docId)).containsExactly(a);
}
// ─── backfillAnnotationFileHashForDocument ────────────────────────────────
@Test
void backfillAnnotationFileHashForDocument_setsHashOnAnnotationsWithNullHash() {
UUID docId = UUID.randomUUID();
String hash = "abc123";
DocumentAnnotation a = DocumentAnnotation.builder()
.id(UUID.randomUUID()).documentId(docId).build();
when(annotationRepository.findByDocumentIdAndFileHashIsNull(docId)).thenReturn(List.of(a));
annotationService.backfillAnnotationFileHashForDocument(docId, hash);
assertThat(a.getFileHash()).isEqualTo(hash);
verify(annotationRepository).save(a);
}
@Test
void backfillAnnotationFileHashForDocument_doesNothingWhenNoAnnotations() {
UUID docId = UUID.randomUUID();
when(annotationRepository.findByDocumentIdAndFileHashIsNull(docId)).thenReturn(List.of());
annotationService.backfillAnnotationFileHashForDocument(docId, "hash");
verify(annotationRepository, never()).save(any());
}
}

View File

@@ -21,6 +21,7 @@ import java.util.UUID;
import static org.assertj.core.api.Assertions.assertThat;
import static org.assertj.core.api.Assertions.assertThatThrownBy;
import static org.mockito.ArgumentMatchers.any;
import static org.mockito.ArgumentMatchers.eq;
import static org.mockito.Mockito.*;
@ExtendWith(MockitoExtension.class)
@@ -31,6 +32,7 @@ class DocumentServiceTest {
@Mock FileService fileService;
@Mock TagService tagService;
@Mock DocumentVersionService documentVersionService;
@Mock AnnotationService annotationService;
@InjectMocks DocumentService documentService;
// ─── getDocumentById ──────────────────────────────────────────────────────
@@ -209,4 +211,59 @@ class DocumentServiceTest {
verify(documentVersionService).recordVersion(any(Document.class));
}
// ─── backfillFileHashes ───────────────────────────────────────────────────
@Test
void backfillFileHashes_skipsDocumentsWithNoFilePath() throws Exception {
Document noFile = Document.builder().id(UUID.randomUUID()).build();
when(documentRepository.findByFileHashIsNullAndFilePathIsNotNull()).thenReturn(List.of());
int count = documentService.backfillFileHashes();
assertThat(count).isZero();
verify(fileService, never()).downloadFileBytes(any());
}
@Test
void backfillFileHashes_computesHashAndSavesDocument() throws Exception {
UUID docId = UUID.randomUUID();
Document doc = Document.builder().id(docId).filePath("documents/scan.pdf").build();
when(documentRepository.findByFileHashIsNullAndFilePathIsNotNull()).thenReturn(List.of(doc));
when(fileService.downloadFileBytes("documents/scan.pdf")).thenReturn(new byte[]{1, 2, 3});
when(documentRepository.save(any())).thenAnswer(inv -> inv.getArgument(0));
documentService.backfillFileHashes();
assertThat(doc.getFileHash()).isNotNull().hasSize(64);
verify(documentRepository).save(doc);
}
@Test
void backfillFileHashes_propagatesHashToAnnotations() throws Exception {
UUID docId = UUID.randomUUID();
Document doc = Document.builder().id(docId).filePath("documents/scan.pdf").build();
when(documentRepository.findByFileHashIsNullAndFilePathIsNotNull()).thenReturn(List.of(doc));
when(fileService.downloadFileBytes("documents/scan.pdf")).thenReturn(new byte[]{1, 2, 3});
when(documentRepository.save(any())).thenAnswer(inv -> inv.getArgument(0));
documentService.backfillFileHashes();
verify(annotationService).backfillAnnotationFileHashForDocument(eq(docId), any());
}
@Test
void backfillFileHashes_returnsCountOfUpdatedDocuments() throws Exception {
UUID id1 = UUID.randomUUID();
UUID id2 = UUID.randomUUID();
Document doc1 = Document.builder().id(id1).filePath("documents/a.pdf").build();
Document doc2 = Document.builder().id(id2).filePath("documents/b.pdf").build();
when(documentRepository.findByFileHashIsNullAndFilePathIsNotNull()).thenReturn(List.of(doc1, doc2));
when(fileService.downloadFileBytes(any())).thenReturn(new byte[]{1});
when(documentRepository.save(any())).thenAnswer(inv -> inv.getArgument(0));
int count = documentService.backfillFileHashes();
assertThat(count).isEqualTo(2);
}
}

View File

@@ -11,6 +11,8 @@ let editingTagName = $state('');
let editingGroupId: string | null = $state(null);
let backfillResult: number | null = $state(null);
let backfillLoading = $state(false);
let backfillHashesResult: number | null = $state(null);
let backfillHashesLoading = $state(false);
const availablePermissions = ['WRITE_ALL', 'ADMIN', 'ADMIN_USER', 'ADMIN_TAG', 'ADMIN_PERMISSION'];
@@ -45,6 +47,20 @@ async function backfillVersions() {
backfillLoading = false;
}
}
async function backfillFileHashes() {
backfillHashesLoading = true;
backfillHashesResult = null;
try {
const res = await fetch('/api/admin/backfill-file-hashes', { method: 'POST' });
if (res.ok) {
const data = await res.json();
backfillHashesResult = data.count;
}
} finally {
backfillHashesLoading = false;
}
}
</script>
<div class="mx-auto max-w-7xl py-8 font-sans sm:px-6 lg:px-8">
@@ -535,5 +551,24 @@ async function backfillVersions() {
</p>
{/if}
</div>
<div class="mt-4 rounded-sm border border-brand-sand bg-white p-6 shadow-sm">
<h2 class="mb-1 text-lg font-bold text-gray-700">
{m.admin_system_backfill_hashes_heading()}
</h2>
<p class="mb-4 text-sm text-gray-500">{m.admin_system_backfill_hashes_description()}</p>
<button
onclick={backfillFileHashes}
disabled={backfillHashesLoading}
class="rounded bg-brand-navy px-6 py-2 text-sm font-bold text-white uppercase transition hover:bg-brand-mint hover:text-brand-navy disabled:cursor-not-allowed disabled:opacity-50"
>
{backfillHashesLoading ? '…' : m.admin_system_backfill_hashes_btn()}
</button>
{#if backfillHashesResult !== null}
<p class="mt-4 text-sm font-medium text-brand-navy">
{m.admin_system_backfill_hashes_success({ count: backfillHashesResult })}
</p>
{/if}
</div>
{/if}
</div>