refactor(ocr): make single-document OCR async, fix circular dependency
OcrService → OcrAsyncRunner was circular. Fixed by moving all OCR
processing logic (processDocument, clearExistingBlocks, createBlocks)
into OcrAsyncRunner. OcrService is now a thin entry point that
validates, creates the job, and dispatches to OcrAsyncRunner.
Architecture:
- OcrService: validates document, checks health, creates OcrJob, delegates
- OcrAsyncRunner: @Async processDocument + runSingleDocument + runBatch
- OcrBatchService: creates job + job documents, delegates to OcrAsyncRunner
- No circular dependencies
Single-document OCR is now async (returns jobId immediately).
Frontend polls GET /api/ocr/jobs/{jobId} every 3s until DONE/FAILED.
816 backend tests pass, 687 frontend tests pass.
Refs #226
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,142 @@
|
||||
package org.raddatz.familienarchiv.service;
|
||||
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.junit.jupiter.api.extension.ExtendWith;
|
||||
import org.mockito.ArgumentCaptor;
|
||||
import org.mockito.InjectMocks;
|
||||
import org.mockito.Mock;
|
||||
import org.mockito.junit.jupiter.MockitoExtension;
|
||||
import org.raddatz.familienarchiv.dto.CreateAnnotationDTO;
|
||||
import org.raddatz.familienarchiv.model.*;
|
||||
import org.raddatz.familienarchiv.repository.OcrJobDocumentRepository;
|
||||
import org.raddatz.familienarchiv.repository.OcrJobRepository;
|
||||
import org.raddatz.familienarchiv.repository.TranscriptionBlockRepository;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Optional;
|
||||
import java.util.UUID;
|
||||
|
||||
import static org.assertj.core.api.Assertions.assertThat;
|
||||
import static org.mockito.ArgumentMatchers.any;
|
||||
import static org.mockito.ArgumentMatchers.eq;
|
||||
import static org.mockito.Mockito.*;
|
||||
|
||||
@ExtendWith(MockitoExtension.class)
|
||||
class OcrAsyncRunnerTest {
|
||||
|
||||
@Mock OcrClient ocrClient;
|
||||
@Mock DocumentService documentService;
|
||||
@Mock TranscriptionService transcriptionService;
|
||||
@Mock AnnotationService annotationService;
|
||||
@Mock TranscriptionBlockRepository blockRepository;
|
||||
@Mock FileService fileService;
|
||||
@Mock OcrJobRepository ocrJobRepository;
|
||||
@Mock OcrJobDocumentRepository ocrJobDocumentRepository;
|
||||
@Mock OcrProgressService ocrProgressService;
|
||||
|
||||
@InjectMocks OcrAsyncRunner ocrAsyncRunner;
|
||||
|
||||
@Test
|
||||
void processDocument_clearsExistingBlocks() {
|
||||
UUID docId = UUID.randomUUID();
|
||||
UUID userId = UUID.randomUUID();
|
||||
TranscriptionBlock existing = TranscriptionBlock.builder()
|
||||
.id(UUID.randomUUID()).documentId(docId).build();
|
||||
Document doc = Document.builder().id(docId).filePath("test.pdf")
|
||||
.fileHash("hash").scriptType(ScriptType.TYPEWRITER).build();
|
||||
|
||||
when(transcriptionService.listBlocks(docId)).thenReturn(List.of(existing));
|
||||
when(fileService.generatePresignedUrl(any())).thenReturn("http://presigned");
|
||||
when(ocrClient.extractBlocks(any(), any())).thenReturn(List.of());
|
||||
|
||||
ocrAsyncRunner.processDocument(docId, doc, userId);
|
||||
|
||||
verify(transcriptionService).deleteBlock(docId, existing.getId());
|
||||
}
|
||||
|
||||
@Test
|
||||
void processDocument_createsAnnotationAndBlock_forEachResult() {
|
||||
UUID docId = UUID.randomUUID();
|
||||
UUID userId = UUID.randomUUID();
|
||||
Document doc = Document.builder().id(docId).filePath("test.pdf")
|
||||
.fileHash("hash").scriptType(ScriptType.TYPEWRITER).build();
|
||||
|
||||
when(transcriptionService.listBlocks(docId)).thenReturn(List.of());
|
||||
when(fileService.generatePresignedUrl(any())).thenReturn("http://presigned");
|
||||
when(ocrClient.extractBlocks(any(), any())).thenReturn(List.of(
|
||||
new OcrBlockResult(0, 0.1, 0.1, 0.8, 0.04, null, "Line 1"),
|
||||
new OcrBlockResult(0, 0.1, 0.2, 0.8, 0.04, null, "Line 2")));
|
||||
DocumentAnnotation ann = DocumentAnnotation.builder().id(UUID.randomUUID()).build();
|
||||
when(annotationService.createOcrAnnotation(any(), any(), any(), any(), any())).thenReturn(ann);
|
||||
|
||||
ocrAsyncRunner.processDocument(docId, doc, userId);
|
||||
|
||||
verify(annotationService, times(2)).createOcrAnnotation(
|
||||
eq(docId), any(CreateAnnotationDTO.class), eq(userId), eq("hash"), any());
|
||||
verify(blockRepository, times(2)).save(any());
|
||||
}
|
||||
|
||||
@Test
|
||||
void processDocument_setsBlockSourceToOcr() {
|
||||
UUID docId = UUID.randomUUID();
|
||||
UUID userId = UUID.randomUUID();
|
||||
Document doc = Document.builder().id(docId).filePath("test.pdf")
|
||||
.fileHash("hash").scriptType(ScriptType.TYPEWRITER).build();
|
||||
|
||||
when(transcriptionService.listBlocks(docId)).thenReturn(List.of());
|
||||
when(fileService.generatePresignedUrl(any())).thenReturn("http://presigned");
|
||||
when(ocrClient.extractBlocks(any(), any())).thenReturn(List.of(
|
||||
new OcrBlockResult(0, 0.1, 0.1, 0.8, 0.04, null, "Test")));
|
||||
DocumentAnnotation ann = DocumentAnnotation.builder().id(UUID.randomUUID()).build();
|
||||
when(annotationService.createOcrAnnotation(any(), any(), any(), any(), any())).thenReturn(ann);
|
||||
|
||||
ocrAsyncRunner.processDocument(docId, doc, userId);
|
||||
|
||||
ArgumentCaptor<TranscriptionBlock> captor = ArgumentCaptor.forClass(TranscriptionBlock.class);
|
||||
verify(blockRepository).save(captor.capture());
|
||||
assertThat(captor.getValue().getSource()).isEqualTo(BlockSource.OCR);
|
||||
}
|
||||
|
||||
@Test
|
||||
void runSingleDocument_setsJobDone_onSuccess() {
|
||||
UUID jobId = UUID.randomUUID();
|
||||
UUID docId = UUID.randomUUID();
|
||||
UUID userId = UUID.randomUUID();
|
||||
OcrJob job = OcrJob.builder().id(jobId).totalDocuments(1).status(OcrJobStatus.PENDING).build();
|
||||
Document doc = Document.builder().id(docId).filePath("test.pdf")
|
||||
.fileHash("hash").scriptType(ScriptType.TYPEWRITER).build();
|
||||
|
||||
when(ocrJobRepository.findById(jobId)).thenReturn(Optional.of(job));
|
||||
when(ocrJobRepository.save(any())).thenAnswer(inv -> inv.getArgument(0));
|
||||
when(documentService.getDocumentById(docId)).thenReturn(doc);
|
||||
when(transcriptionService.listBlocks(docId)).thenReturn(List.of());
|
||||
when(fileService.generatePresignedUrl(any())).thenReturn("http://presigned");
|
||||
when(ocrClient.extractBlocks(any(), any())).thenReturn(List.of());
|
||||
|
||||
ocrAsyncRunner.runSingleDocument(jobId, docId, userId);
|
||||
|
||||
assertThat(job.getStatus()).isEqualTo(OcrJobStatus.DONE);
|
||||
}
|
||||
|
||||
@Test
|
||||
void runSingleDocument_setsJobFailed_onError() {
|
||||
UUID jobId = UUID.randomUUID();
|
||||
UUID docId = UUID.randomUUID();
|
||||
UUID userId = UUID.randomUUID();
|
||||
OcrJob job = OcrJob.builder().id(jobId).totalDocuments(1).status(OcrJobStatus.PENDING).build();
|
||||
Document doc = Document.builder().id(docId).filePath("test.pdf")
|
||||
.fileHash("hash").scriptType(ScriptType.TYPEWRITER).build();
|
||||
|
||||
when(ocrJobRepository.findById(jobId)).thenReturn(Optional.of(job));
|
||||
when(ocrJobRepository.save(any())).thenAnswer(inv -> inv.getArgument(0));
|
||||
when(documentService.getDocumentById(docId)).thenReturn(doc);
|
||||
when(transcriptionService.listBlocks(docId)).thenReturn(List.of());
|
||||
when(fileService.generatePresignedUrl(any())).thenReturn("http://presigned");
|
||||
when(ocrClient.extractBlocks(any(), any())).thenThrow(new RuntimeException("OCR failed"));
|
||||
|
||||
ocrAsyncRunner.runSingleDocument(jobId, docId, userId);
|
||||
|
||||
assertThat(job.getStatus()).isEqualTo(OcrJobStatus.FAILED);
|
||||
assertThat(job.getErrorCount()).isEqualTo(1);
|
||||
}
|
||||
}
|
||||
@@ -12,24 +12,20 @@ import org.raddatz.familienarchiv.repository.OcrJobDocumentRepository;
|
||||
import org.raddatz.familienarchiv.repository.OcrJobRepository;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Optional;
|
||||
import java.util.UUID;
|
||||
|
||||
import static org.assertj.core.api.Assertions.assertThat;
|
||||
import static org.assertj.core.api.Assertions.assertThatThrownBy;
|
||||
import static org.mockito.ArgumentMatchers.any;
|
||||
import static org.mockito.ArgumentMatchers.eq;
|
||||
import static org.mockito.Mockito.*;
|
||||
|
||||
@ExtendWith(MockitoExtension.class)
|
||||
class OcrBatchServiceTest {
|
||||
|
||||
@Mock OcrService ocrService;
|
||||
@Mock OcrHealthClient ocrHealthClient;
|
||||
@Mock DocumentService documentService;
|
||||
@Mock OcrJobRepository ocrJobRepository;
|
||||
@Mock OcrJobDocumentRepository ocrJobDocumentRepository;
|
||||
@Mock OcrProgressService ocrProgressService;
|
||||
@Mock OcrAsyncRunner ocrAsyncRunner;
|
||||
|
||||
@InjectMocks OcrBatchService ocrBatchService;
|
||||
|
||||
@@ -44,7 +40,7 @@ class OcrBatchServiceTest {
|
||||
}
|
||||
|
||||
@Test
|
||||
void startBatch_createsJobAndReturnsJobId() {
|
||||
void startBatch_createsJobAndDispatchesAsync() {
|
||||
UUID docId = UUID.randomUUID();
|
||||
UUID userId = UUID.randomUUID();
|
||||
UUID jobId = UUID.randomUUID();
|
||||
@@ -56,87 +52,29 @@ class OcrBatchServiceTest {
|
||||
return job;
|
||||
});
|
||||
when(ocrJobDocumentRepository.save(any())).thenAnswer(inv -> inv.getArgument(0));
|
||||
when(ocrJobRepository.findById(jobId)).thenReturn(Optional.of(
|
||||
OcrJob.builder().id(jobId).totalDocuments(1).status(OcrJobStatus.PENDING).build()));
|
||||
when(ocrJobDocumentRepository.findByJobIdOrderByCreatedAtAsc(jobId)).thenReturn(List.of(
|
||||
OcrJobDocument.builder().jobId(jobId).documentId(docId).status(OcrDocumentStatus.PENDING).build()));
|
||||
|
||||
Document doc = Document.builder().id(docId).status(DocumentStatus.UPLOADED)
|
||||
.filePath("test.pdf").fileHash("hash").scriptType(ScriptType.TYPEWRITER).build();
|
||||
when(documentService.getDocumentById(docId)).thenReturn(doc);
|
||||
UUID result = ocrBatchService.startBatch(List.of(docId), userId);
|
||||
|
||||
UUID resultJobId = ocrBatchService.startBatch(List.of(docId), userId);
|
||||
|
||||
assertThat(resultJobId).isEqualTo(jobId);
|
||||
verify(ocrService).processDocument(eq(docId), eq(doc), eq(userId));
|
||||
assertThat(result).isEqualTo(jobId);
|
||||
verify(ocrAsyncRunner).runBatch(jobId, userId);
|
||||
}
|
||||
|
||||
@Test
|
||||
void processBatchAsync_skipsPlaceholderDocuments() {
|
||||
UUID jobId = UUID.randomUUID();
|
||||
UUID uploadedId = UUID.randomUUID();
|
||||
UUID placeholderId = UUID.randomUUID();
|
||||
void startBatch_createsJobDocumentForEachId() {
|
||||
UUID doc1 = UUID.randomUUID();
|
||||
UUID doc2 = UUID.randomUUID();
|
||||
UUID userId = UUID.randomUUID();
|
||||
|
||||
OcrJob job = OcrJob.builder().id(jobId).totalDocuments(2).status(OcrJobStatus.PENDING).build();
|
||||
when(ocrJobRepository.findById(jobId)).thenReturn(Optional.of(job));
|
||||
when(ocrJobRepository.save(any())).thenAnswer(inv -> inv.getArgument(0));
|
||||
when(ocrHealthClient.isHealthy()).thenReturn(true);
|
||||
when(ocrJobRepository.save(any())).thenAnswer(inv -> {
|
||||
OcrJob job = inv.getArgument(0);
|
||||
job.setId(UUID.randomUUID());
|
||||
return job;
|
||||
});
|
||||
when(ocrJobDocumentRepository.save(any())).thenAnswer(inv -> inv.getArgument(0));
|
||||
|
||||
OcrJobDocument uploadedJobDoc = OcrJobDocument.builder()
|
||||
.jobId(jobId).documentId(uploadedId).status(OcrDocumentStatus.PENDING).build();
|
||||
OcrJobDocument placeholderJobDoc = OcrJobDocument.builder()
|
||||
.jobId(jobId).documentId(placeholderId).status(OcrDocumentStatus.PENDING).build();
|
||||
when(ocrJobDocumentRepository.findByJobIdOrderByCreatedAtAsc(jobId))
|
||||
.thenReturn(List.of(uploadedJobDoc, placeholderJobDoc));
|
||||
ocrBatchService.startBatch(List.of(doc1, doc2), userId);
|
||||
|
||||
Document uploaded = Document.builder().id(uploadedId).status(DocumentStatus.UPLOADED)
|
||||
.filePath("test.pdf").fileHash("hash").scriptType(ScriptType.TYPEWRITER).build();
|
||||
Document placeholder = Document.builder().id(placeholderId).status(DocumentStatus.PLACEHOLDER).build();
|
||||
when(documentService.getDocumentById(uploadedId)).thenReturn(uploaded);
|
||||
when(documentService.getDocumentById(placeholderId)).thenReturn(placeholder);
|
||||
|
||||
ocrBatchService.processBatchAsync(jobId, userId);
|
||||
|
||||
verify(ocrService).processDocument(eq(uploadedId), eq(uploaded), eq(userId));
|
||||
verify(ocrService, never()).processDocument(eq(placeholderId), any(), any());
|
||||
assertThat(placeholderJobDoc.getStatus()).isEqualTo(OcrDocumentStatus.SKIPPED);
|
||||
}
|
||||
|
||||
@Test
|
||||
void processBatchAsync_continuesAfterSingleDocumentFailure() {
|
||||
UUID jobId = UUID.randomUUID();
|
||||
UUID failDocId = UUID.randomUUID();
|
||||
UUID successDocId = UUID.randomUUID();
|
||||
UUID userId = UUID.randomUUID();
|
||||
|
||||
OcrJob job = OcrJob.builder().id(jobId).totalDocuments(2).status(OcrJobStatus.PENDING).build();
|
||||
when(ocrJobRepository.findById(jobId)).thenReturn(Optional.of(job));
|
||||
when(ocrJobRepository.save(any())).thenAnswer(inv -> inv.getArgument(0));
|
||||
when(ocrJobDocumentRepository.save(any())).thenAnswer(inv -> inv.getArgument(0));
|
||||
|
||||
OcrJobDocument failJobDoc = OcrJobDocument.builder()
|
||||
.jobId(jobId).documentId(failDocId).status(OcrDocumentStatus.PENDING).build();
|
||||
OcrJobDocument successJobDoc = OcrJobDocument.builder()
|
||||
.jobId(jobId).documentId(successDocId).status(OcrDocumentStatus.PENDING).build();
|
||||
when(ocrJobDocumentRepository.findByJobIdOrderByCreatedAtAsc(jobId))
|
||||
.thenReturn(List.of(failJobDoc, successJobDoc));
|
||||
|
||||
Document failDoc = Document.builder().id(failDocId).status(DocumentStatus.UPLOADED)
|
||||
.filePath("fail.pdf").fileHash("hash1").scriptType(ScriptType.TYPEWRITER).build();
|
||||
Document successDoc = Document.builder().id(successDocId).status(DocumentStatus.UPLOADED)
|
||||
.filePath("success.pdf").fileHash("hash2").scriptType(ScriptType.TYPEWRITER).build();
|
||||
when(documentService.getDocumentById(failDocId)).thenReturn(failDoc);
|
||||
when(documentService.getDocumentById(successDocId)).thenReturn(successDoc);
|
||||
|
||||
doThrow(new RuntimeException("OCR failed")).when(ocrService)
|
||||
.processDocument(eq(failDocId), any(), any());
|
||||
|
||||
ocrBatchService.processBatchAsync(jobId, userId);
|
||||
|
||||
verify(ocrService).processDocument(eq(successDocId), eq(successDoc), eq(userId));
|
||||
assertThat(failJobDoc.getStatus()).isEqualTo(OcrDocumentStatus.FAILED);
|
||||
assertThat(successJobDoc.getStatus()).isEqualTo(OcrDocumentStatus.DONE);
|
||||
assertThat(job.getStatus()).isEqualTo(OcrJobStatus.DONE);
|
||||
verify(ocrJobDocumentRepository, times(2)).save(any());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2,50 +2,39 @@ package org.raddatz.familienarchiv.service;
|
||||
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.junit.jupiter.api.extension.ExtendWith;
|
||||
import org.mockito.ArgumentCaptor;
|
||||
import org.mockito.InjectMocks;
|
||||
import org.mockito.Mock;
|
||||
import org.mockito.junit.jupiter.MockitoExtension;
|
||||
import org.mockito.ArgumentCaptor;
|
||||
import org.raddatz.familienarchiv.dto.CreateAnnotationDTO;
|
||||
import org.raddatz.familienarchiv.exception.DomainException;
|
||||
import org.raddatz.familienarchiv.exception.ErrorCode;
|
||||
import org.raddatz.familienarchiv.model.*;
|
||||
import org.raddatz.familienarchiv.repository.OcrJobRepository;
|
||||
import org.raddatz.familienarchiv.repository.TranscriptionBlockRepository;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.UUID;
|
||||
|
||||
import static org.assertj.core.api.Assertions.assertThat;
|
||||
import static org.assertj.core.api.Assertions.assertThatThrownBy;
|
||||
import static org.mockito.ArgumentMatchers.any;
|
||||
import static org.mockito.ArgumentMatchers.eq;
|
||||
import static org.mockito.Mockito.*;
|
||||
import static org.springframework.http.HttpStatus.*;
|
||||
|
||||
@ExtendWith(MockitoExtension.class)
|
||||
class OcrServiceTest {
|
||||
|
||||
@Mock OcrClient ocrClient;
|
||||
@Mock OcrHealthClient ocrHealthClient;
|
||||
@Mock DocumentService documentService;
|
||||
@Mock TranscriptionService transcriptionService;
|
||||
@Mock AnnotationService annotationService;
|
||||
@Mock TranscriptionBlockRepository blockRepository;
|
||||
@Mock OcrJobRepository ocrJobRepository;
|
||||
@Mock FileService fileService;
|
||||
@Mock OcrAsyncRunner ocrAsyncRunner;
|
||||
|
||||
@InjectMocks OcrService ocrService;
|
||||
|
||||
@Test
|
||||
void startOcr_throwsBadRequest_whenDocumentIsPlaceholder() {
|
||||
UUID docId = UUID.randomUUID();
|
||||
UUID userId = UUID.randomUUID();
|
||||
Document doc = Document.builder().id(docId).status(DocumentStatus.PLACEHOLDER).build();
|
||||
when(documentService.getDocumentById(docId)).thenReturn(doc);
|
||||
|
||||
assertThatThrownBy(() -> ocrService.startOcr(docId, null, userId))
|
||||
assertThatThrownBy(() -> ocrService.startOcr(docId, null, UUID.randomUUID()))
|
||||
.isInstanceOf(DomainException.class)
|
||||
.satisfies(e -> {
|
||||
DomainException de = (DomainException) e;
|
||||
@@ -57,150 +46,53 @@ class OcrServiceTest {
|
||||
@Test
|
||||
void startOcr_throwsServiceUnavailable_whenOcrServiceIsDown() {
|
||||
UUID docId = UUID.randomUUID();
|
||||
UUID userId = UUID.randomUUID();
|
||||
Document doc = Document.builder().id(docId).status(DocumentStatus.UPLOADED)
|
||||
.filePath("documents/test.pdf").fileHash("hash123").build();
|
||||
.filePath("test.pdf").build();
|
||||
when(documentService.getDocumentById(docId)).thenReturn(doc);
|
||||
when(ocrHealthClient.isHealthy()).thenReturn(false);
|
||||
|
||||
assertThatThrownBy(() -> ocrService.startOcr(docId, null, userId))
|
||||
assertThatThrownBy(() -> ocrService.startOcr(docId, null, UUID.randomUUID()))
|
||||
.isInstanceOf(DomainException.class)
|
||||
.satisfies(e -> {
|
||||
DomainException de = (DomainException) e;
|
||||
assertThat(de.getCode()).isEqualTo(ErrorCode.OCR_SERVICE_UNAVAILABLE);
|
||||
});
|
||||
.satisfies(e -> assertThat(((DomainException) e).getCode())
|
||||
.isEqualTo(ErrorCode.OCR_SERVICE_UNAVAILABLE));
|
||||
}
|
||||
|
||||
@Test
|
||||
void startOcr_createsJobAndReturnsJobId() {
|
||||
void startOcr_createsJobAndDispatchesAsync() {
|
||||
UUID docId = UUID.randomUUID();
|
||||
UUID userId = UUID.randomUUID();
|
||||
UUID jobId = UUID.randomUUID();
|
||||
Document doc = Document.builder().id(docId).status(DocumentStatus.UPLOADED)
|
||||
.filePath("documents/test.pdf").fileHash("hash123")
|
||||
.scriptType(ScriptType.TYPEWRITER).build();
|
||||
.filePath("test.pdf").scriptType(ScriptType.TYPEWRITER).build();
|
||||
when(documentService.getDocumentById(docId)).thenReturn(doc);
|
||||
when(ocrHealthClient.isHealthy()).thenReturn(true);
|
||||
when(fileService.generatePresignedUrl(any())).thenReturn("http://minio/presigned");
|
||||
when(ocrClient.extractBlocks(any(), any())).thenReturn(List.of());
|
||||
when(ocrJobRepository.save(any())).thenAnswer(inv -> {
|
||||
OcrJob job = inv.getArgument(0);
|
||||
job.setId(jobId);
|
||||
return job;
|
||||
});
|
||||
|
||||
UUID resultJobId = ocrService.startOcr(docId, ScriptType.TYPEWRITER, userId);
|
||||
UUID result = ocrService.startOcr(docId, null, userId);
|
||||
|
||||
assertThat(resultJobId).isEqualTo(jobId);
|
||||
verify(ocrJobRepository, atLeastOnce()).save(any());
|
||||
assertThat(result).isEqualTo(jobId);
|
||||
verify(ocrAsyncRunner).runSingleDocument(jobId, docId, userId);
|
||||
}
|
||||
|
||||
@Test
|
||||
void startOcr_setsScriptTypeOnDocument_whenProvided() {
|
||||
UUID docId = UUID.randomUUID();
|
||||
UUID userId = UUID.randomUUID();
|
||||
Document doc = Document.builder().id(docId).status(DocumentStatus.UPLOADED)
|
||||
.filePath("documents/test.pdf").fileHash("hash123")
|
||||
.scriptType(ScriptType.UNKNOWN).build();
|
||||
.filePath("test.pdf").scriptType(ScriptType.UNKNOWN).build();
|
||||
when(documentService.getDocumentById(docId)).thenReturn(doc);
|
||||
when(ocrHealthClient.isHealthy()).thenReturn(true);
|
||||
when(fileService.generatePresignedUrl(any())).thenReturn("http://minio/presigned");
|
||||
when(ocrClient.extractBlocks(any(), any())).thenReturn(List.of());
|
||||
when(ocrJobRepository.save(any())).thenAnswer(inv -> {
|
||||
OcrJob job = inv.getArgument(0);
|
||||
job.setId(UUID.randomUUID());
|
||||
return job;
|
||||
});
|
||||
|
||||
ocrService.startOcr(docId, ScriptType.HANDWRITING_LATIN, userId);
|
||||
ocrService.startOcr(docId, ScriptType.HANDWRITING_LATIN, UUID.randomUUID());
|
||||
|
||||
assertThat(doc.getScriptType()).isEqualTo(ScriptType.HANDWRITING_LATIN);
|
||||
}
|
||||
|
||||
@Test
|
||||
void startOcr_clearsExistingBlocks_beforeCreatingNew() {
|
||||
UUID docId = UUID.randomUUID();
|
||||
UUID userId = UUID.randomUUID();
|
||||
Document doc = Document.builder().id(docId).status(DocumentStatus.UPLOADED)
|
||||
.filePath("documents/test.pdf").fileHash("hash123")
|
||||
.scriptType(ScriptType.TYPEWRITER).build();
|
||||
TranscriptionBlock existingBlock = TranscriptionBlock.builder()
|
||||
.id(UUID.randomUUID()).documentId(docId).build();
|
||||
|
||||
when(documentService.getDocumentById(docId)).thenReturn(doc);
|
||||
when(ocrHealthClient.isHealthy()).thenReturn(true);
|
||||
when(fileService.generatePresignedUrl(any())).thenReturn("http://minio/presigned");
|
||||
when(transcriptionService.listBlocks(docId)).thenReturn(List.of(existingBlock));
|
||||
when(ocrClient.extractBlocks(any(), any())).thenReturn(List.of(
|
||||
new OcrBlockResult(0, 0.1, 0.1, 0.8, 0.04, null, "Hello")));
|
||||
when(ocrJobRepository.save(any())).thenAnswer(inv -> {
|
||||
OcrJob job = inv.getArgument(0);
|
||||
job.setId(UUID.randomUUID());
|
||||
return job;
|
||||
});
|
||||
DocumentAnnotation ann = DocumentAnnotation.builder().id(UUID.randomUUID()).build();
|
||||
when(annotationService.createOcrAnnotation(any(), any(), any(), any(), any())).thenReturn(ann);
|
||||
|
||||
ocrService.startOcr(docId, null, userId);
|
||||
|
||||
verify(transcriptionService).deleteBlock(docId, existingBlock.getId());
|
||||
}
|
||||
|
||||
@Test
|
||||
void startOcr_createsAnnotationAndBlock_forEachOcrResult() {
|
||||
UUID docId = UUID.randomUUID();
|
||||
UUID userId = UUID.randomUUID();
|
||||
Document doc = Document.builder().id(docId).status(DocumentStatus.UPLOADED)
|
||||
.filePath("documents/test.pdf").fileHash("hash123")
|
||||
.scriptType(ScriptType.TYPEWRITER).build();
|
||||
|
||||
OcrBlockResult block1 = new OcrBlockResult(0, 0.1, 0.1, 0.8, 0.04, null, "Line 1");
|
||||
OcrBlockResult block2 = new OcrBlockResult(0, 0.1, 0.2, 0.8, 0.04, null, "Line 2");
|
||||
|
||||
when(documentService.getDocumentById(docId)).thenReturn(doc);
|
||||
when(ocrHealthClient.isHealthy()).thenReturn(true);
|
||||
when(fileService.generatePresignedUrl(any())).thenReturn("http://minio/presigned");
|
||||
when(transcriptionService.listBlocks(docId)).thenReturn(List.of());
|
||||
when(ocrClient.extractBlocks(any(), any())).thenReturn(List.of(block1, block2));
|
||||
when(ocrJobRepository.save(any())).thenAnswer(inv -> {
|
||||
OcrJob job = inv.getArgument(0);
|
||||
job.setId(UUID.randomUUID());
|
||||
return job;
|
||||
});
|
||||
DocumentAnnotation ann = DocumentAnnotation.builder().id(UUID.randomUUID()).build();
|
||||
when(annotationService.createOcrAnnotation(any(), any(), any(), any(), any())).thenReturn(ann);
|
||||
|
||||
ocrService.startOcr(docId, null, userId);
|
||||
|
||||
verify(annotationService, times(2)).createOcrAnnotation(
|
||||
eq(docId), any(CreateAnnotationDTO.class), eq(userId), eq("hash123"), any());
|
||||
}
|
||||
|
||||
@Test
|
||||
void startOcr_setsBlockSourceToOcr() {
|
||||
UUID docId = UUID.randomUUID();
|
||||
UUID userId = UUID.randomUUID();
|
||||
Document doc = Document.builder().id(docId).status(DocumentStatus.UPLOADED)
|
||||
.filePath("documents/test.pdf").fileHash("hash123")
|
||||
.scriptType(ScriptType.TYPEWRITER).build();
|
||||
OcrBlockResult block = new OcrBlockResult(0, 0.1, 0.1, 0.8, 0.04, null, "Test");
|
||||
|
||||
when(documentService.getDocumentById(docId)).thenReturn(doc);
|
||||
when(ocrHealthClient.isHealthy()).thenReturn(true);
|
||||
when(transcriptionService.listBlocks(docId)).thenReturn(List.of());
|
||||
when(ocrClient.extractBlocks(any(), any())).thenReturn(List.of(block));
|
||||
when(ocrJobRepository.save(any())).thenAnswer(inv -> {
|
||||
OcrJob job = inv.getArgument(0);
|
||||
job.setId(UUID.randomUUID());
|
||||
return job;
|
||||
});
|
||||
DocumentAnnotation ann = DocumentAnnotation.builder().id(UUID.randomUUID()).build();
|
||||
when(annotationService.createOcrAnnotation(any(), any(), any(), any(), any())).thenReturn(ann);
|
||||
|
||||
ocrService.startOcr(docId, null, userId);
|
||||
|
||||
ArgumentCaptor<TranscriptionBlock> captor = ArgumentCaptor.forClass(TranscriptionBlock.class);
|
||||
verify(blockRepository).save(captor.capture());
|
||||
assertThat(captor.getValue().getSource()).isEqualTo(BlockSource.OCR);
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user