feat(backend): add POST /api/admin/backfill-file-hashes endpoint
- DocumentRepository: findByFileHashIsNullAndFilePathIsNotNull() - AnnotationRepository: findByDocumentIdAndFileHashIsNull() - FileService: downloadFileBytes() downloads raw bytes from S3 for hashing - AnnotationService: backfillAnnotationFileHashForDocument() sets hash on null-hash annotations - DocumentService: backfillFileHashes() iterates documents with null hash, downloads bytes, computes SHA-256, saves doc, then propagates hash to annotations - AdminController: POST /api/admin/backfill-file-hashes delegates to DocumentService Closes #56 Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -41,4 +41,10 @@ public class AdminController {
|
||||
documentService.getDocumentsWithoutVersions());
|
||||
return ResponseEntity.ok(new BackfillResult(count));
|
||||
}
|
||||
|
||||
@PostMapping("/backfill-file-hashes")
|
||||
public ResponseEntity<BackfillResult> backfillFileHashes() {
|
||||
int count = documentService.backfillFileHashes();
|
||||
return ResponseEntity.ok(new BackfillResult(count));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -14,4 +14,6 @@ public interface AnnotationRepository extends JpaRepository<DocumentAnnotation,
|
||||
List<DocumentAnnotation> findByDocumentIdAndPageNumber(UUID documentId, int pageNumber);
|
||||
|
||||
Optional<DocumentAnnotation> findByIdAndDocumentId(UUID id, UUID documentId);
|
||||
|
||||
List<DocumentAnnotation> findByDocumentIdAndFileHashIsNull(UUID documentId);
|
||||
}
|
||||
|
||||
@@ -37,6 +37,8 @@ public interface DocumentRepository extends JpaRepository<Document, UUID>, JpaSp
|
||||
@Query("SELECT d FROM Document d WHERE d.id NOT IN (SELECT DISTINCT dv.documentId FROM DocumentVersion dv)")
|
||||
List<Document> findDocumentsWithoutVersions();
|
||||
|
||||
List<Document> findByFileHashIsNullAndFilePathIsNotNull();
|
||||
|
||||
@Query("SELECT DISTINCT d FROM Document d " +
|
||||
"JOIN d.receivers r " +
|
||||
"WHERE " +
|
||||
|
||||
@@ -62,6 +62,14 @@ public class AnnotationService {
|
||||
annotationRepository.delete(annotation);
|
||||
}
|
||||
|
||||
@Transactional
|
||||
public void backfillAnnotationFileHashForDocument(UUID documentId, String fileHash) {
|
||||
annotationRepository.findByDocumentIdAndFileHashIsNull(documentId).forEach(a -> {
|
||||
a.setFileHash(fileHash);
|
||||
annotationRepository.save(a);
|
||||
});
|
||||
}
|
||||
|
||||
// ─── private helpers ──────────────────────────────────────────────────────
|
||||
|
||||
private boolean overlaps(DocumentAnnotation existing, CreateAnnotationDTO dto) {
|
||||
|
||||
@@ -18,6 +18,8 @@ import org.springframework.transaction.annotation.Transactional;
|
||||
import org.springframework.web.multipart.MultipartFile;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.security.MessageDigest;
|
||||
import java.security.NoSuchAlgorithmException;
|
||||
import java.time.LocalDate;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
@@ -38,6 +40,7 @@ public class DocumentService {
|
||||
private final FileService fileService;
|
||||
private final TagService tagService;
|
||||
private final DocumentVersionService documentVersionService;
|
||||
private final AnnotationService annotationService;
|
||||
|
||||
/**
|
||||
* Lädt eine Datei hoch.
|
||||
@@ -282,4 +285,39 @@ public class DocumentService {
|
||||
});
|
||||
tagService.delete(tagId);
|
||||
}
|
||||
|
||||
@Transactional
|
||||
public int backfillFileHashes() {
|
||||
List<Document> docs = documentRepository.findByFileHashIsNullAndFilePathIsNotNull();
|
||||
int count = 0;
|
||||
for (Document doc : docs) {
|
||||
try {
|
||||
byte[] bytes = fileService.downloadFileBytes(doc.getFilePath());
|
||||
String hash = sha256Hex(bytes);
|
||||
doc.setFileHash(hash);
|
||||
documentRepository.save(doc);
|
||||
annotationService.backfillAnnotationFileHashForDocument(doc.getId(), hash);
|
||||
count++;
|
||||
} catch (Exception e) {
|
||||
log.warn("Failed to backfill hash for document {}: {}", doc.getId(), e.getMessage());
|
||||
}
|
||||
}
|
||||
return count;
|
||||
}
|
||||
|
||||
// ─── private helpers ──────────────────────────────────────────────────────
|
||||
|
||||
private static String sha256Hex(byte[] bytes) {
|
||||
try {
|
||||
MessageDigest digest = MessageDigest.getInstance("SHA-256");
|
||||
byte[] hash = digest.digest(bytes);
|
||||
StringBuilder sb = new StringBuilder(64);
|
||||
for (byte b : hash) {
|
||||
sb.append(String.format("%02x", b));
|
||||
}
|
||||
return sb.toString();
|
||||
} catch (NoSuchAlgorithmException e) {
|
||||
throw new IllegalStateException("SHA-256 not available", e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,6 +13,7 @@ import org.springframework.web.multipart.MultipartFile;
|
||||
import org.springframework.core.io.InputStreamResource;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.security.MessageDigest;
|
||||
import java.security.NoSuchAlgorithmException;
|
||||
import java.util.UUID;
|
||||
@@ -85,6 +86,26 @@ public class FileService {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Downloads a file from S3/MinIO and returns its raw bytes.
|
||||
* Used for hash backfill — callers are responsible for not calling this on large files unnecessarily.
|
||||
*/
|
||||
public byte[] downloadFileBytes(String s3Key) throws IOException {
|
||||
try {
|
||||
GetObjectRequest getObjectRequest = GetObjectRequest.builder()
|
||||
.bucket(bucketName)
|
||||
.key(s3Key)
|
||||
.build();
|
||||
try (InputStream in = s3Client.getObject(getObjectRequest)) {
|
||||
return in.readAllBytes();
|
||||
}
|
||||
} catch (NoSuchKeyException e) {
|
||||
throw new StorageFileNotFoundException("File not found in storage: " + s3Key);
|
||||
} catch (S3Exception e) {
|
||||
throw new IOException("Failed to download file from storage: " + e.getMessage(), e);
|
||||
}
|
||||
}
|
||||
|
||||
// ─── private helpers ──────────────────────────────────────────────────────
|
||||
|
||||
private static String sha256Hex(byte[] bytes) {
|
||||
|
||||
@@ -58,4 +58,29 @@ class AdminControllerTest {
|
||||
.andExpect(status().isOk())
|
||||
.andExpect(jsonPath("$.count").value(1));
|
||||
}
|
||||
|
||||
// ─── POST /api/admin/backfill-file-hashes ──────────────────────────────────
|
||||
|
||||
@Test
|
||||
void backfillFileHashes_returns401_whenUnauthenticated() throws Exception {
|
||||
mockMvc.perform(post("/api/admin/backfill-file-hashes"))
|
||||
.andExpect(status().isUnauthorized());
|
||||
}
|
||||
|
||||
@Test
|
||||
@WithMockUser(roles = "USER")
|
||||
void backfillFileHashes_returns403_whenNotAdmin() throws Exception {
|
||||
mockMvc.perform(post("/api/admin/backfill-file-hashes"))
|
||||
.andExpect(status().isForbidden());
|
||||
}
|
||||
|
||||
@Test
|
||||
@WithMockUser(authorities = "ADMIN")
|
||||
void backfillFileHashes_returns200_withCount_whenAdmin() throws Exception {
|
||||
when(documentService.backfillFileHashes()).thenReturn(3);
|
||||
|
||||
mockMvc.perform(post("/api/admin/backfill-file-hashes"))
|
||||
.andExpect(status().isOk())
|
||||
.andExpect(jsonPath("$.count").value(3));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -157,4 +157,30 @@ class AnnotationServiceTest {
|
||||
|
||||
assertThat(annotationService.listAnnotations(docId)).containsExactly(a);
|
||||
}
|
||||
|
||||
// ─── backfillAnnotationFileHashForDocument ────────────────────────────────
|
||||
|
||||
@Test
|
||||
void backfillAnnotationFileHashForDocument_setsHashOnAnnotationsWithNullHash() {
|
||||
UUID docId = UUID.randomUUID();
|
||||
String hash = "abc123";
|
||||
DocumentAnnotation a = DocumentAnnotation.builder()
|
||||
.id(UUID.randomUUID()).documentId(docId).build();
|
||||
when(annotationRepository.findByDocumentIdAndFileHashIsNull(docId)).thenReturn(List.of(a));
|
||||
|
||||
annotationService.backfillAnnotationFileHashForDocument(docId, hash);
|
||||
|
||||
assertThat(a.getFileHash()).isEqualTo(hash);
|
||||
verify(annotationRepository).save(a);
|
||||
}
|
||||
|
||||
@Test
|
||||
void backfillAnnotationFileHashForDocument_doesNothingWhenNoAnnotations() {
|
||||
UUID docId = UUID.randomUUID();
|
||||
when(annotationRepository.findByDocumentIdAndFileHashIsNull(docId)).thenReturn(List.of());
|
||||
|
||||
annotationService.backfillAnnotationFileHashForDocument(docId, "hash");
|
||||
|
||||
verify(annotationRepository, never()).save(any());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -21,6 +21,7 @@ import java.util.UUID;
|
||||
import static org.assertj.core.api.Assertions.assertThat;
|
||||
import static org.assertj.core.api.Assertions.assertThatThrownBy;
|
||||
import static org.mockito.ArgumentMatchers.any;
|
||||
import static org.mockito.ArgumentMatchers.eq;
|
||||
import static org.mockito.Mockito.*;
|
||||
|
||||
@ExtendWith(MockitoExtension.class)
|
||||
@@ -31,6 +32,7 @@ class DocumentServiceTest {
|
||||
@Mock FileService fileService;
|
||||
@Mock TagService tagService;
|
||||
@Mock DocumentVersionService documentVersionService;
|
||||
@Mock AnnotationService annotationService;
|
||||
@InjectMocks DocumentService documentService;
|
||||
|
||||
// ─── getDocumentById ──────────────────────────────────────────────────────
|
||||
@@ -209,4 +211,59 @@ class DocumentServiceTest {
|
||||
|
||||
verify(documentVersionService).recordVersion(any(Document.class));
|
||||
}
|
||||
|
||||
// ─── backfillFileHashes ───────────────────────────────────────────────────
|
||||
|
||||
@Test
|
||||
void backfillFileHashes_skipsDocumentsWithNoFilePath() throws Exception {
|
||||
Document noFile = Document.builder().id(UUID.randomUUID()).build();
|
||||
when(documentRepository.findByFileHashIsNullAndFilePathIsNotNull()).thenReturn(List.of());
|
||||
|
||||
int count = documentService.backfillFileHashes();
|
||||
|
||||
assertThat(count).isZero();
|
||||
verify(fileService, never()).downloadFileBytes(any());
|
||||
}
|
||||
|
||||
@Test
|
||||
void backfillFileHashes_computesHashAndSavesDocument() throws Exception {
|
||||
UUID docId = UUID.randomUUID();
|
||||
Document doc = Document.builder().id(docId).filePath("documents/scan.pdf").build();
|
||||
when(documentRepository.findByFileHashIsNullAndFilePathIsNotNull()).thenReturn(List.of(doc));
|
||||
when(fileService.downloadFileBytes("documents/scan.pdf")).thenReturn(new byte[]{1, 2, 3});
|
||||
when(documentRepository.save(any())).thenAnswer(inv -> inv.getArgument(0));
|
||||
|
||||
documentService.backfillFileHashes();
|
||||
|
||||
assertThat(doc.getFileHash()).isNotNull().hasSize(64);
|
||||
verify(documentRepository).save(doc);
|
||||
}
|
||||
|
||||
@Test
|
||||
void backfillFileHashes_propagatesHashToAnnotations() throws Exception {
|
||||
UUID docId = UUID.randomUUID();
|
||||
Document doc = Document.builder().id(docId).filePath("documents/scan.pdf").build();
|
||||
when(documentRepository.findByFileHashIsNullAndFilePathIsNotNull()).thenReturn(List.of(doc));
|
||||
when(fileService.downloadFileBytes("documents/scan.pdf")).thenReturn(new byte[]{1, 2, 3});
|
||||
when(documentRepository.save(any())).thenAnswer(inv -> inv.getArgument(0));
|
||||
|
||||
documentService.backfillFileHashes();
|
||||
|
||||
verify(annotationService).backfillAnnotationFileHashForDocument(eq(docId), any());
|
||||
}
|
||||
|
||||
@Test
|
||||
void backfillFileHashes_returnsCountOfUpdatedDocuments() throws Exception {
|
||||
UUID id1 = UUID.randomUUID();
|
||||
UUID id2 = UUID.randomUUID();
|
||||
Document doc1 = Document.builder().id(id1).filePath("documents/a.pdf").build();
|
||||
Document doc2 = Document.builder().id(id2).filePath("documents/b.pdf").build();
|
||||
when(documentRepository.findByFileHashIsNullAndFilePathIsNotNull()).thenReturn(List.of(doc1, doc2));
|
||||
when(fileService.downloadFileBytes(any())).thenReturn(new byte[]{1});
|
||||
when(documentRepository.save(any())).thenAnswer(inv -> inv.getArgument(0));
|
||||
|
||||
int count = documentService.backfillFileHashes();
|
||||
|
||||
assertThat(count).isEqualTo(2);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -11,6 +11,8 @@ let editingTagName = $state('');
|
||||
let editingGroupId: string | null = $state(null);
|
||||
let backfillResult: number | null = $state(null);
|
||||
let backfillLoading = $state(false);
|
||||
let backfillHashesResult: number | null = $state(null);
|
||||
let backfillHashesLoading = $state(false);
|
||||
|
||||
const availablePermissions = ['WRITE_ALL', 'ADMIN', 'ADMIN_USER', 'ADMIN_TAG', 'ADMIN_PERMISSION'];
|
||||
|
||||
@@ -45,6 +47,20 @@ async function backfillVersions() {
|
||||
backfillLoading = false;
|
||||
}
|
||||
}
|
||||
|
||||
async function backfillFileHashes() {
|
||||
backfillHashesLoading = true;
|
||||
backfillHashesResult = null;
|
||||
try {
|
||||
const res = await fetch('/api/admin/backfill-file-hashes', { method: 'POST' });
|
||||
if (res.ok) {
|
||||
const data = await res.json();
|
||||
backfillHashesResult = data.count;
|
||||
}
|
||||
} finally {
|
||||
backfillHashesLoading = false;
|
||||
}
|
||||
}
|
||||
</script>
|
||||
|
||||
<div class="mx-auto max-w-7xl py-8 font-sans sm:px-6 lg:px-8">
|
||||
@@ -535,5 +551,24 @@ async function backfillVersions() {
|
||||
</p>
|
||||
{/if}
|
||||
</div>
|
||||
|
||||
<div class="mt-4 rounded-sm border border-brand-sand bg-white p-6 shadow-sm">
|
||||
<h2 class="mb-1 text-lg font-bold text-gray-700">
|
||||
{m.admin_system_backfill_hashes_heading()}
|
||||
</h2>
|
||||
<p class="mb-4 text-sm text-gray-500">{m.admin_system_backfill_hashes_description()}</p>
|
||||
<button
|
||||
onclick={backfillFileHashes}
|
||||
disabled={backfillHashesLoading}
|
||||
class="rounded bg-brand-navy px-6 py-2 text-sm font-bold text-white uppercase transition hover:bg-brand-mint hover:text-brand-navy disabled:cursor-not-allowed disabled:opacity-50"
|
||||
>
|
||||
{backfillHashesLoading ? '…' : m.admin_system_backfill_hashes_btn()}
|
||||
</button>
|
||||
{#if backfillHashesResult !== null}
|
||||
<p class="mt-4 text-sm font-medium text-brand-navy">
|
||||
{m.admin_system_backfill_hashes_success({ count: backfillHashesResult })}
|
||||
</p>
|
||||
{/if}
|
||||
</div>
|
||||
{/if}
|
||||
</div>
|
||||
|
||||
Reference in New Issue
Block a user