feat: backfill file hashes for existing documents (#56) #59

Merged
marcel merged 4 commits from feature/56-backfill-file-hashes into main 2026-03-24 22:09:37 +01:00
17 changed files with 268 additions and 24 deletions

View File

@@ -41,4 +41,10 @@ public class AdminController {
documentService.getDocumentsWithoutVersions());
return ResponseEntity.ok(new BackfillResult(count));
}
@PostMapping("/backfill-file-hashes")
public ResponseEntity<BackfillResult> backfillFileHashes() {
int count = documentService.backfillFileHashes();
return ResponseEntity.ok(new BackfillResult(count));
}
}

View File

@@ -14,4 +14,6 @@ public interface AnnotationRepository extends JpaRepository<DocumentAnnotation,
List<DocumentAnnotation> findByDocumentIdAndPageNumber(UUID documentId, int pageNumber);
Optional<DocumentAnnotation> findByIdAndDocumentId(UUID id, UUID documentId);
List<DocumentAnnotation> findByDocumentIdAndFileHashIsNull(UUID documentId);
}

View File

@@ -37,6 +37,8 @@ public interface DocumentRepository extends JpaRepository<Document, UUID>, JpaSp
@Query("SELECT d FROM Document d WHERE d.id NOT IN (SELECT DISTINCT dv.documentId FROM DocumentVersion dv)")
List<Document> findDocumentsWithoutVersions();
List<Document> findByFileHashIsNullAndFilePathIsNotNull();
@Query("SELECT DISTINCT d FROM Document d " +
"JOIN d.receivers r " +
"WHERE " +

View File

@@ -62,6 +62,14 @@ public class AnnotationService {
annotationRepository.delete(annotation);
}
@Transactional
public void backfillAnnotationFileHashForDocument(UUID documentId, String fileHash) {
annotationRepository.findByDocumentIdAndFileHashIsNull(documentId).forEach(a -> {
a.setFileHash(fileHash);
annotationRepository.save(a);
});
}
// ─── private helpers ──────────────────────────────────────────────────────
private boolean overlaps(DocumentAnnotation existing, CreateAnnotationDTO dto) {

View File

@@ -18,6 +18,8 @@ import org.springframework.transaction.annotation.Transactional;
import org.springframework.web.multipart.MultipartFile;
import java.io.IOException;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.time.LocalDate;
import java.util.ArrayList;
import java.util.Arrays;
@@ -38,6 +40,7 @@ public class DocumentService {
private final FileService fileService;
private final TagService tagService;
private final DocumentVersionService documentVersionService;
private final AnnotationService annotationService;
/**
* Lädt eine Datei hoch.
@@ -282,4 +285,39 @@ public class DocumentService {
});
tagService.delete(tagId);
}
@Transactional
public int backfillFileHashes() {
List<Document> docs = documentRepository.findByFileHashIsNullAndFilePathIsNotNull();
int count = 0;
for (Document doc : docs) {
try {
byte[] bytes = fileService.downloadFileBytes(doc.getFilePath());
String hash = sha256Hex(bytes);
doc.setFileHash(hash);
documentRepository.save(doc);
annotationService.backfillAnnotationFileHashForDocument(doc.getId(), hash);
count++;
} catch (Exception e) {
log.warn("Failed to backfill hash for document {}: {}", doc.getId(), e.getMessage());
}
}
return count;
}
// ─── private helpers ──────────────────────────────────────────────────────
private static String sha256Hex(byte[] bytes) {
try {
MessageDigest digest = MessageDigest.getInstance("SHA-256");
byte[] hash = digest.digest(bytes);
StringBuilder sb = new StringBuilder(64);
for (byte b : hash) {
sb.append(String.format("%02x", b));
}
return sb.toString();
} catch (NoSuchAlgorithmException e) {
throw new IllegalStateException("SHA-256 not available", e);
}
}
}

View File

@@ -13,6 +13,7 @@ import org.springframework.web.multipart.MultipartFile;
import org.springframework.core.io.InputStreamResource;
import java.io.IOException;
import java.io.InputStream;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.UUID;
@@ -85,6 +86,26 @@ public class FileService {
}
}
/**
* Downloads a file from S3/MinIO and returns its raw bytes.
* Used for hash backfill — callers are responsible for not calling this on large files unnecessarily.
*/
public byte[] downloadFileBytes(String s3Key) throws IOException {
try {
GetObjectRequest getObjectRequest = GetObjectRequest.builder()
.bucket(bucketName)
.key(s3Key)
.build();
try (InputStream in = s3Client.getObject(getObjectRequest)) {
return in.readAllBytes();
}
} catch (NoSuchKeyException e) {
throw new StorageFileNotFoundException("File not found in storage: " + s3Key);
} catch (S3Exception e) {
throw new IOException("Failed to download file from storage: " + e.getMessage(), e);
}
}
// ─── private helpers ──────────────────────────────────────────────────────
private static String sha256Hex(byte[] bytes) {

View File

@@ -58,4 +58,29 @@ class AdminControllerTest {
.andExpect(status().isOk())
.andExpect(jsonPath("$.count").value(1));
}
// ─── POST /api/admin/backfill-file-hashes ──────────────────────────────────
@Test
void backfillFileHashes_returns401_whenUnauthenticated() throws Exception {
mockMvc.perform(post("/api/admin/backfill-file-hashes"))
.andExpect(status().isUnauthorized());
}
@Test
@WithMockUser(roles = "USER")
void backfillFileHashes_returns403_whenNotAdmin() throws Exception {
mockMvc.perform(post("/api/admin/backfill-file-hashes"))
.andExpect(status().isForbidden());
}
@Test
@WithMockUser(authorities = "ADMIN")
void backfillFileHashes_returns200_withCount_whenAdmin() throws Exception {
when(documentService.backfillFileHashes()).thenReturn(3);
mockMvc.perform(post("/api/admin/backfill-file-hashes"))
.andExpect(status().isOk())
.andExpect(jsonPath("$.count").value(3));
}
}

View File

@@ -157,4 +157,30 @@ class AnnotationServiceTest {
assertThat(annotationService.listAnnotations(docId)).containsExactly(a);
}
// ─── backfillAnnotationFileHashForDocument ────────────────────────────────
@Test
void backfillAnnotationFileHashForDocument_setsHashOnAnnotationsWithNullHash() {
UUID docId = UUID.randomUUID();
String hash = "abc123";
DocumentAnnotation a = DocumentAnnotation.builder()
.id(UUID.randomUUID()).documentId(docId).build();
when(annotationRepository.findByDocumentIdAndFileHashIsNull(docId)).thenReturn(List.of(a));
annotationService.backfillAnnotationFileHashForDocument(docId, hash);
assertThat(a.getFileHash()).isEqualTo(hash);
verify(annotationRepository).save(a);
}
@Test
void backfillAnnotationFileHashForDocument_doesNothingWhenNoAnnotations() {
UUID docId = UUID.randomUUID();
when(annotationRepository.findByDocumentIdAndFileHashIsNull(docId)).thenReturn(List.of());
annotationService.backfillAnnotationFileHashForDocument(docId, "hash");
verify(annotationRepository, never()).save(any());
}
}

View File

@@ -21,6 +21,7 @@ import java.util.UUID;
import static org.assertj.core.api.Assertions.assertThat;
import static org.assertj.core.api.Assertions.assertThatThrownBy;
import static org.mockito.ArgumentMatchers.any;
import static org.mockito.ArgumentMatchers.eq;
import static org.mockito.Mockito.*;
@ExtendWith(MockitoExtension.class)
@@ -31,6 +32,7 @@ class DocumentServiceTest {
@Mock FileService fileService;
@Mock TagService tagService;
@Mock DocumentVersionService documentVersionService;
@Mock AnnotationService annotationService;
@InjectMocks DocumentService documentService;
// ─── getDocumentById ──────────────────────────────────────────────────────
@@ -209,4 +211,59 @@ class DocumentServiceTest {
verify(documentVersionService).recordVersion(any(Document.class));
}
// ─── backfillFileHashes ───────────────────────────────────────────────────
@Test
void backfillFileHashes_skipsDocumentsWithNoFilePath() throws Exception {
Document noFile = Document.builder().id(UUID.randomUUID()).build();
when(documentRepository.findByFileHashIsNullAndFilePathIsNotNull()).thenReturn(List.of());
int count = documentService.backfillFileHashes();
assertThat(count).isZero();
verify(fileService, never()).downloadFileBytes(any());
}
@Test
void backfillFileHashes_computesHashAndSavesDocument() throws Exception {
UUID docId = UUID.randomUUID();
Document doc = Document.builder().id(docId).filePath("documents/scan.pdf").build();
when(documentRepository.findByFileHashIsNullAndFilePathIsNotNull()).thenReturn(List.of(doc));
when(fileService.downloadFileBytes("documents/scan.pdf")).thenReturn(new byte[]{1, 2, 3});
when(documentRepository.save(any())).thenAnswer(inv -> inv.getArgument(0));
documentService.backfillFileHashes();
assertThat(doc.getFileHash()).isNotNull().hasSize(64);
verify(documentRepository).save(doc);
}
@Test
void backfillFileHashes_propagatesHashToAnnotations() throws Exception {
UUID docId = UUID.randomUUID();
Document doc = Document.builder().id(docId).filePath("documents/scan.pdf").build();
when(documentRepository.findByFileHashIsNullAndFilePathIsNotNull()).thenReturn(List.of(doc));
when(fileService.downloadFileBytes("documents/scan.pdf")).thenReturn(new byte[]{1, 2, 3});
when(documentRepository.save(any())).thenAnswer(inv -> inv.getArgument(0));
documentService.backfillFileHashes();
verify(annotationService).backfillAnnotationFileHashForDocument(eq(docId), any());
}
@Test
void backfillFileHashes_returnsCountOfUpdatedDocuments() throws Exception {
UUID id1 = UUID.randomUUID();
UUID id2 = UUID.randomUUID();
Document doc1 = Document.builder().id(id1).filePath("documents/a.pdf").build();
Document doc2 = Document.builder().id(id2).filePath("documents/b.pdf").build();
when(documentRepository.findByFileHashIsNullAndFilePathIsNotNull()).thenReturn(List.of(doc1, doc2));
when(fileService.downloadFileBytes(any())).thenReturn(new byte[]{1});
when(documentRepository.save(any())).thenAnswer(inv -> inv.getArgument(0));
int count = documentService.backfillFileHashes();
assertThat(count).isEqualTo(2);
}
}

View File

@@ -216,3 +216,35 @@ test.describe('Admin — tag management', () => {
await page.screenshot({ path: 'test-results/e2e/admin-tag-restored.png' });
});
});
// ─── System tab — backfill file hashes ────────────────────────────────────────
test.describe('Admin system tab — backfill file hashes', () => {
test('admin triggers file hash backfill and sees success message', async ({ request, page }) => {
test.setTimeout(60_000);
// Create a document via API so there is at least one without a hash
const createRes = await request.post('/api/documents', {
multipart: { title: 'E2E Backfill Hash Test' }
});
if (!createRes.ok()) throw new Error(`Create failed: ${createRes.status()}`);
await page.goto('/admin');
await page.waitForSelector('[data-hydrated]');
// Navigate to System tab
await page.getByRole('button', { name: /system/i }).click();
// Click the backfill hashes button
const btn = page.getByRole('button', { name: /datei-hashes berechnen/i });
await expect(btn).toBeVisible();
await btn.click();
// Success message must appear (count >= 0)
await expect(page.locator('text=/\\d+ Dokumente wurden aktualisiert/i')).toBeVisible({
timeout: 15000
});
await page.screenshot({ path: 'test-results/e2e/admin-backfill-hashes.png' });
});
});

View File

@@ -482,7 +482,7 @@ test.describe('PDF annotations — read-only user', () => {
// Isolated session — does not share the admin storage state
test.use({ storageState: { cookies: [], origins: [] } });
test('read-only user sees a disabled Annotieren button', async ({ page }) => {
test('read-only user does not see the Annotieren button', async ({ page }) => {
test.setTimeout(60_000);
await page.goto('/login');
await page.getByLabel('Benutzername').fill('reader');
@@ -494,12 +494,10 @@ test.describe('PDF annotations — read-only user', () => {
const baseURL = process.env.E2E_BASE_URL ?? 'http://localhost:3000';
await page.goto(`${baseURL}/documents/${sharedAnnotationDocId}`);
await page.waitForSelector('[data-hydrated]');
// Wait for the PDF canvas — once rendered, the controls bar (with disabled button) is shown.
await page.locator('canvas').first().waitFor({ state: 'visible', timeout: 30000 });
const disabledBtn = page.getByRole('button', { name: /annotieren/i });
await expect(disabledBtn).toBeVisible({ timeout: 5000 });
await expect(disabledBtn).toBeDisabled();
// Reader users do not have ANNOTATE_ALL permission — the button must not be shown at all.
const annotateBtn = page.getByRole('button', { name: /annotieren/i });
await expect(annotateBtn).not.toBeVisible({ timeout: 5000 });
await page.screenshot({ path: 'test-results/e2e/annotations-button-reader.png' });
});

View File

@@ -242,6 +242,10 @@
"admin_system_backfill_description": "Erstellt einen initialen Verlaufseintrag für alle Dokumente, die noch keinen Verlauf haben (z.B. importierte Dokumente). Dadurch werden beim nächsten Bearbeiten nur die tatsächlich geänderten Felder hervorgehoben.",
"admin_system_backfill_btn": "Jetzt auffüllen",
"admin_system_backfill_success": "{count} Dokumente wurden aufgefüllt.",
"admin_system_backfill_hashes_heading": "Datei-Hashes berechnen",
"admin_system_backfill_hashes_description": "Berechnet den SHA-256-Hash für alle bereits hochgeladenen Dokumente, die noch keinen Hash haben. Dadurch werden Annotationen korrekt mit ihrer Dateiversion verknüpft und wieder angezeigt.",
"admin_system_backfill_hashes_btn": "Datei-Hashes berechnen",
"admin_system_backfill_hashes_success": "{count} Dokumente wurden aktualisiert.",
"comp_expandable_show_more": "Mehr anzeigen",
"comp_expandable_show_less": "Weniger anzeigen",
"error_comment_not_found": "Der Kommentar wurde nicht gefunden.",

View File

@@ -242,6 +242,10 @@
"admin_system_backfill_description": "Creates an initial history entry for all documents that do not have one yet (e.g. imported documents). This ensures that future edits only highlight actually changed fields.",
"admin_system_backfill_btn": "Backfill now",
"admin_system_backfill_success": "{count} documents were backfilled.",
"admin_system_backfill_hashes_heading": "Compute file hashes",
"admin_system_backfill_hashes_description": "Computes the SHA-256 hash for all previously uploaded documents that do not have one yet. This ensures annotations are correctly linked to their file version and shown again.",
"admin_system_backfill_hashes_btn": "Compute file hashes",
"admin_system_backfill_hashes_success": "{count} documents were updated.",
"comp_expandable_show_more": "Show more",
"comp_expandable_show_less": "Show less",
"error_comment_not_found": "The comment could not be found.",

View File

@@ -242,6 +242,10 @@
"admin_system_backfill_description": "Crea una entrada de historial inicial para todos los documentos que aún no tienen ninguna (p.ej. documentos importados). Así, en la próxima edición solo se resaltarán los campos realmente modificados.",
"admin_system_backfill_btn": "Completar ahora",
"admin_system_backfill_success": "{count} documentos fueron completados.",
"admin_system_backfill_hashes_heading": "Calcular hashes de archivo",
"admin_system_backfill_hashes_description": "Calcula el hash SHA-256 para todos los documentos ya subidos que aún no tienen uno. Así las anotaciones se vinculan correctamente a su versión del archivo y vuelven a mostrarse.",
"admin_system_backfill_hashes_btn": "Calcular hashes de archivo",
"admin_system_backfill_hashes_success": "{count} documentos fueron actualizados.",
"comp_expandable_show_more": "Mostrar más",
"comp_expandable_show_less": "Mostrar menos",
"error_comment_not_found": "El comentario no pudo encontrarse.",

View File

@@ -250,16 +250,6 @@ $effect(() => {
>
{doc.title || doc.originalFilename}
</h3>
<!-- Status Badge -->
<span
class="ml-3 inline-flex items-center rounded-full border px-2.5 py-0.5 text-[10px] font-bold tracking-wide uppercase
{doc.status === 'UPLOADED'
? 'border-brand-mint/50 bg-brand-mint/20 text-brand-navy'
: 'border-yellow-200 bg-yellow-50 text-yellow-700'}"
>
{doc.status}
</span>
</div>
<!-- Metadata Row -->

View File

@@ -11,6 +11,8 @@ let editingTagName = $state('');
let editingGroupId: string | null = $state(null);
let backfillResult: number | null = $state(null);
let backfillLoading = $state(false);
let backfillHashesResult: number | null = $state(null);
let backfillHashesLoading = $state(false);
const availablePermissions = ['WRITE_ALL', 'ADMIN', 'ADMIN_USER', 'ADMIN_TAG', 'ADMIN_PERMISSION'];
@@ -45,6 +47,20 @@ async function backfillVersions() {
backfillLoading = false;
}
}
async function backfillFileHashes() {
backfillHashesLoading = true;
backfillHashesResult = null;
try {
const res = await fetch('/api/admin/backfill-file-hashes', { method: 'POST' });
if (res.ok) {
const data = await res.json();
backfillHashesResult = data.count;
}
} finally {
backfillHashesLoading = false;
}
}
</script>
<div class="mx-auto max-w-7xl py-8 font-sans sm:px-6 lg:px-8">
@@ -535,5 +551,24 @@ async function backfillVersions() {
</p>
{/if}
</div>
<div class="mt-4 rounded-sm border border-brand-sand bg-white p-6 shadow-sm">
<h2 class="mb-1 text-lg font-bold text-gray-700">
{m.admin_system_backfill_hashes_heading()}
</h2>
<p class="mb-4 text-sm text-gray-500">{m.admin_system_backfill_hashes_description()}</p>
<button
onclick={backfillFileHashes}
disabled={backfillHashesLoading}
class="rounded bg-brand-navy px-6 py-2 text-sm font-bold text-white uppercase transition hover:bg-brand-mint hover:text-brand-navy disabled:cursor-not-allowed disabled:opacity-50"
>
{backfillHashesLoading ? '…' : m.admin_system_backfill_hashes_btn()}
</button>
{#if backfillHashesResult !== null}
<p class="mt-4 text-sm font-medium text-brand-navy">
{m.admin_system_backfill_hashes_success({ count: backfillHashesResult })}
</p>
{/if}
</div>
{/if}
</div>

View File

@@ -347,14 +347,6 @@ function versionLabel(v: VersionSummary, index: number): string {
<h1 class="truncate font-serif text-xl text-brand-navy" title={doc.title}>
{doc.title || doc.originalFilename}
</h1>
<span
class="flex-shrink-0 rounded-full px-3 py-1 font-sans text-xs font-bold tracking-wide uppercase
{doc.status === 'UPLOADED'
? 'border border-brand-mint bg-brand-mint/30 text-brand-navy'
: 'border border-yellow-200 bg-yellow-100 text-yellow-800'}"
>
{doc.status}
</span>
</div>
</div>