fix(ocr): use presigned URLs for MinIO access from OCR service
Some checks failed
CI / Unit & Component Tests (push) Failing after 2s
CI / Backend Unit Tests (push) Failing after 0s
CI / Unit & Component Tests (pull_request) Failing after 1s
CI / Backend Unit Tests (pull_request) Failing after 1s

The OCR service was getting 403 Forbidden because it tried to
download PDFs from MinIO using plain internal URLs without
authentication. MinIO buckets are private.

- Add S3Presigner bean to MinioConfig
- FileService.generatePresignedUrl(): generates 15-min presigned URLs
- OcrService uses presigned URLs instead of plain internal URLs
- Remove unused s3InternalUrl / bucketName @Value fields from OcrService

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Marcel
2026-04-12 22:16:52 +02:00
parent 7a4da7cb98
commit 4500c99e40
6 changed files with 48 additions and 18 deletions

View File

@@ -32,7 +32,7 @@ class FileServiceTest {
@BeforeEach
void setUp() {
s3Client = mock(S3Client.class);
fileService = new FileService(s3Client, "test-bucket");
fileService = new FileService(s3Client, null, "test-bucket");
}
@Test

View File

@@ -34,6 +34,7 @@ class OcrServiceTest {
@Mock AnnotationService annotationService;
@Mock TranscriptionBlockRepository blockRepository;
@Mock OcrJobRepository ocrJobRepository;
@Mock FileService fileService;
@InjectMocks OcrService ocrService;
@@ -80,7 +81,7 @@ class OcrServiceTest {
.scriptType(ScriptType.TYPEWRITER).build();
when(documentService.getDocumentById(docId)).thenReturn(doc);
when(ocrHealthClient.isHealthy()).thenReturn(true);
// ocrService constructs the internal MinIO URL from S3 key
when(fileService.generatePresignedUrl(any())).thenReturn("http://minio/presigned");
when(ocrClient.extractBlocks(any(), any())).thenReturn(List.of());
when(ocrJobRepository.save(any())).thenAnswer(inv -> {
OcrJob job = inv.getArgument(0);
@@ -103,7 +104,7 @@ class OcrServiceTest {
.scriptType(ScriptType.UNKNOWN).build();
when(documentService.getDocumentById(docId)).thenReturn(doc);
when(ocrHealthClient.isHealthy()).thenReturn(true);
// ocrService constructs the internal MinIO URL from S3 key
when(fileService.generatePresignedUrl(any())).thenReturn("http://minio/presigned");
when(ocrClient.extractBlocks(any(), any())).thenReturn(List.of());
when(ocrJobRepository.save(any())).thenAnswer(inv -> {
OcrJob job = inv.getArgument(0);
@@ -128,7 +129,7 @@ class OcrServiceTest {
when(documentService.getDocumentById(docId)).thenReturn(doc);
when(ocrHealthClient.isHealthy()).thenReturn(true);
// ocrService constructs the internal MinIO URL from S3 key
when(fileService.generatePresignedUrl(any())).thenReturn("http://minio/presigned");
when(transcriptionService.listBlocks(docId)).thenReturn(List.of(existingBlock));
when(ocrClient.extractBlocks(any(), any())).thenReturn(List.of(
new OcrBlockResult(0, 0.1, 0.1, 0.8, 0.04, null, "Hello")));
@@ -158,7 +159,7 @@ class OcrServiceTest {
when(documentService.getDocumentById(docId)).thenReturn(doc);
when(ocrHealthClient.isHealthy()).thenReturn(true);
// ocrService constructs the internal MinIO URL from S3 key
when(fileService.generatePresignedUrl(any())).thenReturn("http://minio/presigned");
when(transcriptionService.listBlocks(docId)).thenReturn(List.of());
when(ocrClient.extractBlocks(any(), any())).thenReturn(List.of(block1, block2));
when(ocrJobRepository.save(any())).thenAnswer(inv -> {