feat(backend): add POST /api/admin/backfill-file-hashes endpoint
- DocumentRepository: findByFileHashIsNullAndFilePathIsNotNull() - AnnotationRepository: findByDocumentIdAndFileHashIsNull() - FileService: downloadFileBytes() downloads raw bytes from S3 for hashing - AnnotationService: backfillAnnotationFileHashForDocument() sets hash on null-hash annotations - DocumentService: backfillFileHashes() iterates documents with null hash, downloads bytes, computes SHA-256, saves doc, then propagates hash to annotations - AdminController: POST /api/admin/backfill-file-hashes delegates to DocumentService Closes #56 Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -41,4 +41,10 @@ public class AdminController {
|
||||
documentService.getDocumentsWithoutVersions());
|
||||
return ResponseEntity.ok(new BackfillResult(count));
|
||||
}
|
||||
|
||||
@PostMapping("/backfill-file-hashes")
|
||||
public ResponseEntity<BackfillResult> backfillFileHashes() {
|
||||
int count = documentService.backfillFileHashes();
|
||||
return ResponseEntity.ok(new BackfillResult(count));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -14,4 +14,6 @@ public interface AnnotationRepository extends JpaRepository<DocumentAnnotation,
|
||||
List<DocumentAnnotation> findByDocumentIdAndPageNumber(UUID documentId, int pageNumber);
|
||||
|
||||
Optional<DocumentAnnotation> findByIdAndDocumentId(UUID id, UUID documentId);
|
||||
|
||||
List<DocumentAnnotation> findByDocumentIdAndFileHashIsNull(UUID documentId);
|
||||
}
|
||||
|
||||
@@ -37,6 +37,8 @@ public interface DocumentRepository extends JpaRepository<Document, UUID>, JpaSp
|
||||
@Query("SELECT d FROM Document d WHERE d.id NOT IN (SELECT DISTINCT dv.documentId FROM DocumentVersion dv)")
|
||||
List<Document> findDocumentsWithoutVersions();
|
||||
|
||||
List<Document> findByFileHashIsNullAndFilePathIsNotNull();
|
||||
|
||||
@Query("SELECT DISTINCT d FROM Document d " +
|
||||
"JOIN d.receivers r " +
|
||||
"WHERE " +
|
||||
|
||||
@@ -62,6 +62,14 @@ public class AnnotationService {
|
||||
annotationRepository.delete(annotation);
|
||||
}
|
||||
|
||||
@Transactional
|
||||
public void backfillAnnotationFileHashForDocument(UUID documentId, String fileHash) {
|
||||
annotationRepository.findByDocumentIdAndFileHashIsNull(documentId).forEach(a -> {
|
||||
a.setFileHash(fileHash);
|
||||
annotationRepository.save(a);
|
||||
});
|
||||
}
|
||||
|
||||
// ─── private helpers ──────────────────────────────────────────────────────
|
||||
|
||||
private boolean overlaps(DocumentAnnotation existing, CreateAnnotationDTO dto) {
|
||||
|
||||
@@ -18,6 +18,8 @@ import org.springframework.transaction.annotation.Transactional;
|
||||
import org.springframework.web.multipart.MultipartFile;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.security.MessageDigest;
|
||||
import java.security.NoSuchAlgorithmException;
|
||||
import java.time.LocalDate;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
@@ -38,6 +40,7 @@ public class DocumentService {
|
||||
private final FileService fileService;
|
||||
private final TagService tagService;
|
||||
private final DocumentVersionService documentVersionService;
|
||||
private final AnnotationService annotationService;
|
||||
|
||||
/**
|
||||
* Lädt eine Datei hoch.
|
||||
@@ -282,4 +285,39 @@ public class DocumentService {
|
||||
});
|
||||
tagService.delete(tagId);
|
||||
}
|
||||
|
||||
@Transactional
|
||||
public int backfillFileHashes() {
|
||||
List<Document> docs = documentRepository.findByFileHashIsNullAndFilePathIsNotNull();
|
||||
int count = 0;
|
||||
for (Document doc : docs) {
|
||||
try {
|
||||
byte[] bytes = fileService.downloadFileBytes(doc.getFilePath());
|
||||
String hash = sha256Hex(bytes);
|
||||
doc.setFileHash(hash);
|
||||
documentRepository.save(doc);
|
||||
annotationService.backfillAnnotationFileHashForDocument(doc.getId(), hash);
|
||||
count++;
|
||||
} catch (Exception e) {
|
||||
log.warn("Failed to backfill hash for document {}: {}", doc.getId(), e.getMessage());
|
||||
}
|
||||
}
|
||||
return count;
|
||||
}
|
||||
|
||||
// ─── private helpers ──────────────────────────────────────────────────────
|
||||
|
||||
private static String sha256Hex(byte[] bytes) {
|
||||
try {
|
||||
MessageDigest digest = MessageDigest.getInstance("SHA-256");
|
||||
byte[] hash = digest.digest(bytes);
|
||||
StringBuilder sb = new StringBuilder(64);
|
||||
for (byte b : hash) {
|
||||
sb.append(String.format("%02x", b));
|
||||
}
|
||||
return sb.toString();
|
||||
} catch (NoSuchAlgorithmException e) {
|
||||
throw new IllegalStateException("SHA-256 not available", e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,6 +13,7 @@ import org.springframework.web.multipart.MultipartFile;
|
||||
import org.springframework.core.io.InputStreamResource;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.security.MessageDigest;
|
||||
import java.security.NoSuchAlgorithmException;
|
||||
import java.util.UUID;
|
||||
@@ -85,6 +86,26 @@ public class FileService {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Downloads a file from S3/MinIO and returns its raw bytes.
|
||||
* Used for hash backfill — callers are responsible for not calling this on large files unnecessarily.
|
||||
*/
|
||||
public byte[] downloadFileBytes(String s3Key) throws IOException {
|
||||
try {
|
||||
GetObjectRequest getObjectRequest = GetObjectRequest.builder()
|
||||
.bucket(bucketName)
|
||||
.key(s3Key)
|
||||
.build();
|
||||
try (InputStream in = s3Client.getObject(getObjectRequest)) {
|
||||
return in.readAllBytes();
|
||||
}
|
||||
} catch (NoSuchKeyException e) {
|
||||
throw new StorageFileNotFoundException("File not found in storage: " + s3Key);
|
||||
} catch (S3Exception e) {
|
||||
throw new IOException("Failed to download file from storage: " + e.getMessage(), e);
|
||||
}
|
||||
}
|
||||
|
||||
// ─── private helpers ──────────────────────────────────────────────────────
|
||||
|
||||
private static String sha256Hex(byte[] bytes) {
|
||||
|
||||
Reference in New Issue
Block a user