refactor(ocr): make single-document OCR async, fix circular dependency
OcrService → OcrAsyncRunner was circular. Fixed by moving all OCR
processing logic (processDocument, clearExistingBlocks, createBlocks)
into OcrAsyncRunner. OcrService is now a thin entry point that
validates, creates the job, and dispatches to OcrAsyncRunner.
Architecture:
- OcrService: validates document, checks health, creates OcrJob, delegates
- OcrAsyncRunner: @Async processDocument + runSingleDocument + runBatch
- OcrBatchService: creates job + job documents, delegates to OcrAsyncRunner
- No circular dependencies
Single-document OCR is now async (returns jobId immediately).
Frontend polls GET /api/ocr/jobs/{jobId} every 3s until DONE/FAILED.
816 backend tests pass, 687 frontend tests pass.
Refs #226
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,156 @@
|
|||||||
|
package org.raddatz.familienarchiv.service;
|
||||||
|
|
||||||
|
import lombok.RequiredArgsConstructor;
|
||||||
|
import lombok.extern.slf4j.Slf4j;
|
||||||
|
import org.raddatz.familienarchiv.dto.CreateAnnotationDTO;
|
||||||
|
import org.raddatz.familienarchiv.model.*;
|
||||||
|
import org.raddatz.familienarchiv.repository.OcrJobDocumentRepository;
|
||||||
|
import org.raddatz.familienarchiv.repository.OcrJobRepository;
|
||||||
|
import org.raddatz.familienarchiv.repository.TranscriptionBlockRepository;
|
||||||
|
import org.springframework.scheduling.annotation.Async;
|
||||||
|
import org.springframework.stereotype.Component;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.UUID;
|
||||||
|
|
||||||
|
@Component
|
||||||
|
@RequiredArgsConstructor
|
||||||
|
@Slf4j
|
||||||
|
public class OcrAsyncRunner {
|
||||||
|
|
||||||
|
private static final String OCR_ANNOTATION_COLOR = "#00C7B1";
|
||||||
|
|
||||||
|
private final OcrClient ocrClient;
|
||||||
|
private final DocumentService documentService;
|
||||||
|
private final TranscriptionService transcriptionService;
|
||||||
|
private final AnnotationService annotationService;
|
||||||
|
private final TranscriptionBlockRepository blockRepository;
|
||||||
|
private final FileService fileService;
|
||||||
|
private final OcrJobRepository ocrJobRepository;
|
||||||
|
private final OcrJobDocumentRepository ocrJobDocumentRepository;
|
||||||
|
private final OcrProgressService ocrProgressService;
|
||||||
|
|
||||||
|
@Async
|
||||||
|
public void runSingleDocument(UUID jobId, UUID documentId, UUID userId) {
|
||||||
|
OcrJob job = ocrJobRepository.findById(jobId).orElse(null);
|
||||||
|
if (job == null) return;
|
||||||
|
|
||||||
|
job.setStatus(OcrJobStatus.RUNNING);
|
||||||
|
ocrJobRepository.save(job);
|
||||||
|
|
||||||
|
Document doc = documentService.getDocumentById(documentId);
|
||||||
|
|
||||||
|
try {
|
||||||
|
processDocument(documentId, doc, userId);
|
||||||
|
job.setStatus(OcrJobStatus.DONE);
|
||||||
|
job.setProcessedDocuments(1);
|
||||||
|
} catch (Exception e) {
|
||||||
|
log.error("OCR processing failed for document {}", documentId, e);
|
||||||
|
job.setStatus(OcrJobStatus.FAILED);
|
||||||
|
job.setErrorCount(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
ocrJobRepository.save(job);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Async
|
||||||
|
public void runBatch(UUID jobId, UUID userId) {
|
||||||
|
OcrJob job = ocrJobRepository.findById(jobId).orElse(null);
|
||||||
|
if (job == null) return;
|
||||||
|
|
||||||
|
job.setStatus(OcrJobStatus.RUNNING);
|
||||||
|
ocrJobRepository.save(job);
|
||||||
|
|
||||||
|
List<OcrJobDocument> jobDocs = ocrJobDocumentRepository.findByJobIdOrderByCreatedAtAsc(jobId);
|
||||||
|
|
||||||
|
for (OcrJobDocument jobDoc : jobDocs) {
|
||||||
|
Document doc = documentService.getDocumentById(jobDoc.getDocumentId());
|
||||||
|
|
||||||
|
if (doc.getStatus() == DocumentStatus.PLACEHOLDER) {
|
||||||
|
jobDoc.setStatus(OcrDocumentStatus.SKIPPED);
|
||||||
|
ocrJobDocumentRepository.save(jobDoc);
|
||||||
|
job.setSkippedCount(job.getSkippedCount() + 1);
|
||||||
|
ocrJobRepository.save(job);
|
||||||
|
ocrProgressService.emit(jobId, "document", Map.of(
|
||||||
|
"documentId", jobDoc.getDocumentId(),
|
||||||
|
"status", "SKIPPED",
|
||||||
|
"processed", job.getProcessedDocuments(),
|
||||||
|
"total", job.getTotalDocuments()));
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
jobDoc.setStatus(OcrDocumentStatus.RUNNING);
|
||||||
|
ocrJobDocumentRepository.save(jobDoc);
|
||||||
|
|
||||||
|
try {
|
||||||
|
processDocument(jobDoc.getDocumentId(), doc, userId);
|
||||||
|
jobDoc.setStatus(OcrDocumentStatus.DONE);
|
||||||
|
job.setProcessedDocuments(job.getProcessedDocuments() + 1);
|
||||||
|
} catch (Exception e) {
|
||||||
|
log.error("OCR batch: failed document {}", jobDoc.getDocumentId(), e);
|
||||||
|
jobDoc.setStatus(OcrDocumentStatus.FAILED);
|
||||||
|
jobDoc.setErrorMessage(e.getMessage());
|
||||||
|
job.setErrorCount(job.getErrorCount() + 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
ocrJobDocumentRepository.save(jobDoc);
|
||||||
|
ocrJobRepository.save(job);
|
||||||
|
|
||||||
|
ocrProgressService.emit(jobId, "document", Map.of(
|
||||||
|
"documentId", jobDoc.getDocumentId(),
|
||||||
|
"status", jobDoc.getStatus().name(),
|
||||||
|
"processed", job.getProcessedDocuments(),
|
||||||
|
"total", job.getTotalDocuments()));
|
||||||
|
}
|
||||||
|
|
||||||
|
job.setStatus(OcrJobStatus.DONE);
|
||||||
|
ocrJobRepository.save(job);
|
||||||
|
|
||||||
|
ocrProgressService.emit(jobId, "done", Map.of(
|
||||||
|
"processed", job.getProcessedDocuments(),
|
||||||
|
"errors", job.getErrorCount(),
|
||||||
|
"skipped", job.getSkippedCount()));
|
||||||
|
ocrProgressService.complete(jobId);
|
||||||
|
}
|
||||||
|
|
||||||
|
void processDocument(UUID documentId, Document doc, UUID userId) {
|
||||||
|
clearExistingBlocks(documentId);
|
||||||
|
|
||||||
|
String pdfUrl = fileService.generatePresignedUrl(doc.getFilePath());
|
||||||
|
List<OcrBlockResult> blocks = ocrClient.extractBlocks(pdfUrl, doc.getScriptType());
|
||||||
|
createTranscriptionBlocks(documentId, blocks, userId, doc.getFileHash());
|
||||||
|
}
|
||||||
|
|
||||||
|
private void clearExistingBlocks(UUID documentId) {
|
||||||
|
List<TranscriptionBlock> existing = transcriptionService.listBlocks(documentId);
|
||||||
|
for (TranscriptionBlock block : existing) {
|
||||||
|
transcriptionService.deleteBlock(documentId, block.getId());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void createTranscriptionBlocks(UUID documentId, List<OcrBlockResult> blocks,
|
||||||
|
UUID userId, String fileHash) {
|
||||||
|
for (int i = 0; i < blocks.size(); i++) {
|
||||||
|
OcrBlockResult block = blocks.get(i);
|
||||||
|
|
||||||
|
CreateAnnotationDTO annotationDTO = new CreateAnnotationDTO(
|
||||||
|
block.pageNumber(), block.x(), block.y(),
|
||||||
|
block.width(), block.height(), OCR_ANNOTATION_COLOR);
|
||||||
|
|
||||||
|
DocumentAnnotation annotation = annotationService.createOcrAnnotation(
|
||||||
|
documentId, annotationDTO, userId, fileHash, block.polygon());
|
||||||
|
|
||||||
|
TranscriptionBlock transcriptionBlock = TranscriptionBlock.builder()
|
||||||
|
.annotationId(annotation.getId())
|
||||||
|
.documentId(documentId)
|
||||||
|
.text(block.text() != null ? block.text() : "")
|
||||||
|
.sortOrder(i)
|
||||||
|
.source(BlockSource.OCR)
|
||||||
|
.createdBy(userId)
|
||||||
|
.updatedBy(userId)
|
||||||
|
.build();
|
||||||
|
blockRepository.save(transcriptionBlock);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -7,11 +7,9 @@ import org.raddatz.familienarchiv.exception.ErrorCode;
|
|||||||
import org.raddatz.familienarchiv.model.*;
|
import org.raddatz.familienarchiv.model.*;
|
||||||
import org.raddatz.familienarchiv.repository.OcrJobDocumentRepository;
|
import org.raddatz.familienarchiv.repository.OcrJobDocumentRepository;
|
||||||
import org.raddatz.familienarchiv.repository.OcrJobRepository;
|
import org.raddatz.familienarchiv.repository.OcrJobRepository;
|
||||||
import org.springframework.scheduling.annotation.Async;
|
|
||||||
import org.springframework.stereotype.Service;
|
import org.springframework.stereotype.Service;
|
||||||
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
|
||||||
import java.util.UUID;
|
import java.util.UUID;
|
||||||
|
|
||||||
@Service
|
@Service
|
||||||
@@ -19,12 +17,10 @@ import java.util.UUID;
|
|||||||
@Slf4j
|
@Slf4j
|
||||||
public class OcrBatchService {
|
public class OcrBatchService {
|
||||||
|
|
||||||
private final OcrService ocrService;
|
|
||||||
private final OcrHealthClient ocrHealthClient;
|
private final OcrHealthClient ocrHealthClient;
|
||||||
private final DocumentService documentService;
|
|
||||||
private final OcrJobRepository ocrJobRepository;
|
private final OcrJobRepository ocrJobRepository;
|
||||||
private final OcrJobDocumentRepository ocrJobDocumentRepository;
|
private final OcrJobDocumentRepository ocrJobDocumentRepository;
|
||||||
private final OcrProgressService ocrProgressService;
|
private final OcrAsyncRunner ocrAsyncRunner;
|
||||||
|
|
||||||
public UUID startBatch(List<UUID> documentIds, UUID userId) {
|
public UUID startBatch(List<UUID> documentIds, UUID userId) {
|
||||||
if (!ocrHealthClient.isHealthy()) {
|
if (!ocrHealthClient.isHealthy()) {
|
||||||
@@ -48,67 +44,7 @@ public class OcrBatchService {
|
|||||||
ocrJobDocumentRepository.save(jobDoc);
|
ocrJobDocumentRepository.save(jobDoc);
|
||||||
}
|
}
|
||||||
|
|
||||||
processBatchAsync(job.getId(), userId);
|
ocrAsyncRunner.runBatch(job.getId(), userId);
|
||||||
return job.getId();
|
return job.getId();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Async
|
|
||||||
void processBatchAsync(UUID jobId, UUID userId) {
|
|
||||||
OcrJob job = ocrJobRepository.findById(jobId).orElse(null);
|
|
||||||
if (job == null) return;
|
|
||||||
|
|
||||||
job.setStatus(OcrJobStatus.RUNNING);
|
|
||||||
ocrJobRepository.save(job);
|
|
||||||
|
|
||||||
List<OcrJobDocument> jobDocs = ocrJobDocumentRepository.findByJobIdOrderByCreatedAtAsc(jobId);
|
|
||||||
|
|
||||||
for (OcrJobDocument jobDoc : jobDocs) {
|
|
||||||
Document doc = documentService.getDocumentById(jobDoc.getDocumentId());
|
|
||||||
|
|
||||||
if (doc.getStatus() == DocumentStatus.PLACEHOLDER) {
|
|
||||||
jobDoc.setStatus(OcrDocumentStatus.SKIPPED);
|
|
||||||
ocrJobDocumentRepository.save(jobDoc);
|
|
||||||
job.setSkippedCount(job.getSkippedCount() + 1);
|
|
||||||
ocrJobRepository.save(job);
|
|
||||||
ocrProgressService.emit(jobId, "document", Map.of(
|
|
||||||
"documentId", jobDoc.getDocumentId(),
|
|
||||||
"status", "SKIPPED",
|
|
||||||
"processed", job.getProcessedDocuments(),
|
|
||||||
"total", job.getTotalDocuments()));
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
jobDoc.setStatus(OcrDocumentStatus.RUNNING);
|
|
||||||
ocrJobDocumentRepository.save(jobDoc);
|
|
||||||
|
|
||||||
try {
|
|
||||||
ocrService.processDocument(jobDoc.getDocumentId(), doc, userId);
|
|
||||||
jobDoc.setStatus(OcrDocumentStatus.DONE);
|
|
||||||
job.setProcessedDocuments(job.getProcessedDocuments() + 1);
|
|
||||||
} catch (Exception e) {
|
|
||||||
log.error("OCR batch: failed document {}", jobDoc.getDocumentId(), e);
|
|
||||||
jobDoc.setStatus(OcrDocumentStatus.FAILED);
|
|
||||||
jobDoc.setErrorMessage(e.getMessage());
|
|
||||||
job.setErrorCount(job.getErrorCount() + 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
ocrJobDocumentRepository.save(jobDoc);
|
|
||||||
ocrJobRepository.save(job);
|
|
||||||
|
|
||||||
ocrProgressService.emit(jobId, "document", Map.of(
|
|
||||||
"documentId", jobDoc.getDocumentId(),
|
|
||||||
"status", jobDoc.getStatus().name(),
|
|
||||||
"processed", job.getProcessedDocuments(),
|
|
||||||
"total", job.getTotalDocuments()));
|
|
||||||
}
|
|
||||||
|
|
||||||
job.setStatus(OcrJobStatus.DONE);
|
|
||||||
ocrJobRepository.save(job);
|
|
||||||
|
|
||||||
ocrProgressService.emit(jobId, "done", Map.of(
|
|
||||||
"processed", job.getProcessedDocuments(),
|
|
||||||
"errors", job.getErrorCount(),
|
|
||||||
"skipped", job.getSkippedCount()));
|
|
||||||
ocrProgressService.complete(jobId);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -2,16 +2,12 @@ package org.raddatz.familienarchiv.service;
|
|||||||
|
|
||||||
import lombok.RequiredArgsConstructor;
|
import lombok.RequiredArgsConstructor;
|
||||||
import lombok.extern.slf4j.Slf4j;
|
import lombok.extern.slf4j.Slf4j;
|
||||||
import org.raddatz.familienarchiv.dto.CreateAnnotationDTO;
|
|
||||||
import org.raddatz.familienarchiv.exception.DomainException;
|
import org.raddatz.familienarchiv.exception.DomainException;
|
||||||
import org.raddatz.familienarchiv.exception.ErrorCode;
|
import org.raddatz.familienarchiv.exception.ErrorCode;
|
||||||
import org.raddatz.familienarchiv.model.*;
|
import org.raddatz.familienarchiv.model.*;
|
||||||
import org.raddatz.familienarchiv.repository.OcrJobRepository;
|
import org.raddatz.familienarchiv.repository.OcrJobRepository;
|
||||||
import org.raddatz.familienarchiv.repository.TranscriptionBlockRepository;
|
|
||||||
import org.springframework.stereotype.Service;
|
import org.springframework.stereotype.Service;
|
||||||
import org.springframework.transaction.annotation.Transactional;
|
|
||||||
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.UUID;
|
import java.util.UUID;
|
||||||
|
|
||||||
@Service
|
@Service
|
||||||
@@ -19,18 +15,11 @@ import java.util.UUID;
|
|||||||
@Slf4j
|
@Slf4j
|
||||||
public class OcrService {
|
public class OcrService {
|
||||||
|
|
||||||
private static final String OCR_ANNOTATION_COLOR = "#00C7B1";
|
|
||||||
|
|
||||||
private final OcrClient ocrClient;
|
|
||||||
private final OcrHealthClient ocrHealthClient;
|
private final OcrHealthClient ocrHealthClient;
|
||||||
private final DocumentService documentService;
|
private final DocumentService documentService;
|
||||||
private final TranscriptionService transcriptionService;
|
|
||||||
private final AnnotationService annotationService;
|
|
||||||
private final TranscriptionBlockRepository blockRepository;
|
|
||||||
private final OcrJobRepository ocrJobRepository;
|
private final OcrJobRepository ocrJobRepository;
|
||||||
private final FileService fileService;
|
private final OcrAsyncRunner ocrAsyncRunner;
|
||||||
|
|
||||||
@Transactional
|
|
||||||
public UUID startOcr(UUID documentId, ScriptType scriptTypeOverride, UUID userId) {
|
public UUID startOcr(UUID documentId, ScriptType scriptTypeOverride, UUID userId) {
|
||||||
Document doc = documentService.getDocumentById(documentId);
|
Document doc = documentService.getDocumentById(documentId);
|
||||||
|
|
||||||
@@ -51,62 +40,11 @@ public class OcrService {
|
|||||||
OcrJob job = OcrJob.builder()
|
OcrJob job = OcrJob.builder()
|
||||||
.totalDocuments(1)
|
.totalDocuments(1)
|
||||||
.createdBy(userId)
|
.createdBy(userId)
|
||||||
.status(OcrJobStatus.RUNNING)
|
.status(OcrJobStatus.PENDING)
|
||||||
.build();
|
.build();
|
||||||
job = ocrJobRepository.save(job);
|
job = ocrJobRepository.save(job);
|
||||||
|
|
||||||
try {
|
ocrAsyncRunner.runSingleDocument(job.getId(), documentId, userId);
|
||||||
processDocument(documentId, doc, userId);
|
|
||||||
job.setStatus(OcrJobStatus.DONE);
|
|
||||||
job.setProcessedDocuments(1);
|
|
||||||
} catch (Exception e) {
|
|
||||||
log.error("OCR processing failed for document {}", documentId, e);
|
|
||||||
job.setStatus(OcrJobStatus.FAILED);
|
|
||||||
job.setErrorCount(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
ocrJobRepository.save(job);
|
|
||||||
return job.getId();
|
return job.getId();
|
||||||
}
|
}
|
||||||
|
|
||||||
void processDocument(UUID documentId, Document doc, UUID userId) {
|
|
||||||
clearExistingBlocks(documentId);
|
|
||||||
|
|
||||||
String pdfUrl = fileService.generatePresignedUrl(doc.getFilePath());
|
|
||||||
List<OcrBlockResult> blocks = ocrClient.extractBlocks(pdfUrl, doc.getScriptType());
|
|
||||||
createTranscriptionBlocks(documentId, blocks, userId, doc.getFileHash());
|
|
||||||
}
|
|
||||||
|
|
||||||
private void clearExistingBlocks(UUID documentId) {
|
|
||||||
List<TranscriptionBlock> existing = transcriptionService.listBlocks(documentId);
|
|
||||||
for (TranscriptionBlock block : existing) {
|
|
||||||
transcriptionService.deleteBlock(documentId, block.getId());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private void createTranscriptionBlocks(UUID documentId, List<OcrBlockResult> blocks,
|
|
||||||
UUID userId, String fileHash) {
|
|
||||||
for (int i = 0; i < blocks.size(); i++) {
|
|
||||||
OcrBlockResult block = blocks.get(i);
|
|
||||||
|
|
||||||
CreateAnnotationDTO annotationDTO = new CreateAnnotationDTO(
|
|
||||||
block.pageNumber(), block.x(), block.y(),
|
|
||||||
block.width(), block.height(), OCR_ANNOTATION_COLOR);
|
|
||||||
|
|
||||||
DocumentAnnotation annotation = annotationService.createOcrAnnotation(
|
|
||||||
documentId, annotationDTO, userId, fileHash, block.polygon());
|
|
||||||
|
|
||||||
TranscriptionBlock transcriptionBlock = TranscriptionBlock.builder()
|
|
||||||
.annotationId(annotation.getId())
|
|
||||||
.documentId(documentId)
|
|
||||||
.text(block.text() != null ? block.text() : "")
|
|
||||||
.sortOrder(i)
|
|
||||||
.source(BlockSource.OCR)
|
|
||||||
.createdBy(userId)
|
|
||||||
.updatedBy(userId)
|
|
||||||
.build();
|
|
||||||
blockRepository.save(transcriptionBlock);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -0,0 +1,142 @@
|
|||||||
|
package org.raddatz.familienarchiv.service;
|
||||||
|
|
||||||
|
import org.junit.jupiter.api.Test;
|
||||||
|
import org.junit.jupiter.api.extension.ExtendWith;
|
||||||
|
import org.mockito.ArgumentCaptor;
|
||||||
|
import org.mockito.InjectMocks;
|
||||||
|
import org.mockito.Mock;
|
||||||
|
import org.mockito.junit.jupiter.MockitoExtension;
|
||||||
|
import org.raddatz.familienarchiv.dto.CreateAnnotationDTO;
|
||||||
|
import org.raddatz.familienarchiv.model.*;
|
||||||
|
import org.raddatz.familienarchiv.repository.OcrJobDocumentRepository;
|
||||||
|
import org.raddatz.familienarchiv.repository.OcrJobRepository;
|
||||||
|
import org.raddatz.familienarchiv.repository.TranscriptionBlockRepository;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Optional;
|
||||||
|
import java.util.UUID;
|
||||||
|
|
||||||
|
import static org.assertj.core.api.Assertions.assertThat;
|
||||||
|
import static org.mockito.ArgumentMatchers.any;
|
||||||
|
import static org.mockito.ArgumentMatchers.eq;
|
||||||
|
import static org.mockito.Mockito.*;
|
||||||
|
|
||||||
|
@ExtendWith(MockitoExtension.class)
|
||||||
|
class OcrAsyncRunnerTest {
|
||||||
|
|
||||||
|
@Mock OcrClient ocrClient;
|
||||||
|
@Mock DocumentService documentService;
|
||||||
|
@Mock TranscriptionService transcriptionService;
|
||||||
|
@Mock AnnotationService annotationService;
|
||||||
|
@Mock TranscriptionBlockRepository blockRepository;
|
||||||
|
@Mock FileService fileService;
|
||||||
|
@Mock OcrJobRepository ocrJobRepository;
|
||||||
|
@Mock OcrJobDocumentRepository ocrJobDocumentRepository;
|
||||||
|
@Mock OcrProgressService ocrProgressService;
|
||||||
|
|
||||||
|
@InjectMocks OcrAsyncRunner ocrAsyncRunner;
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void processDocument_clearsExistingBlocks() {
|
||||||
|
UUID docId = UUID.randomUUID();
|
||||||
|
UUID userId = UUID.randomUUID();
|
||||||
|
TranscriptionBlock existing = TranscriptionBlock.builder()
|
||||||
|
.id(UUID.randomUUID()).documentId(docId).build();
|
||||||
|
Document doc = Document.builder().id(docId).filePath("test.pdf")
|
||||||
|
.fileHash("hash").scriptType(ScriptType.TYPEWRITER).build();
|
||||||
|
|
||||||
|
when(transcriptionService.listBlocks(docId)).thenReturn(List.of(existing));
|
||||||
|
when(fileService.generatePresignedUrl(any())).thenReturn("http://presigned");
|
||||||
|
when(ocrClient.extractBlocks(any(), any())).thenReturn(List.of());
|
||||||
|
|
||||||
|
ocrAsyncRunner.processDocument(docId, doc, userId);
|
||||||
|
|
||||||
|
verify(transcriptionService).deleteBlock(docId, existing.getId());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void processDocument_createsAnnotationAndBlock_forEachResult() {
|
||||||
|
UUID docId = UUID.randomUUID();
|
||||||
|
UUID userId = UUID.randomUUID();
|
||||||
|
Document doc = Document.builder().id(docId).filePath("test.pdf")
|
||||||
|
.fileHash("hash").scriptType(ScriptType.TYPEWRITER).build();
|
||||||
|
|
||||||
|
when(transcriptionService.listBlocks(docId)).thenReturn(List.of());
|
||||||
|
when(fileService.generatePresignedUrl(any())).thenReturn("http://presigned");
|
||||||
|
when(ocrClient.extractBlocks(any(), any())).thenReturn(List.of(
|
||||||
|
new OcrBlockResult(0, 0.1, 0.1, 0.8, 0.04, null, "Line 1"),
|
||||||
|
new OcrBlockResult(0, 0.1, 0.2, 0.8, 0.04, null, "Line 2")));
|
||||||
|
DocumentAnnotation ann = DocumentAnnotation.builder().id(UUID.randomUUID()).build();
|
||||||
|
when(annotationService.createOcrAnnotation(any(), any(), any(), any(), any())).thenReturn(ann);
|
||||||
|
|
||||||
|
ocrAsyncRunner.processDocument(docId, doc, userId);
|
||||||
|
|
||||||
|
verify(annotationService, times(2)).createOcrAnnotation(
|
||||||
|
eq(docId), any(CreateAnnotationDTO.class), eq(userId), eq("hash"), any());
|
||||||
|
verify(blockRepository, times(2)).save(any());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void processDocument_setsBlockSourceToOcr() {
|
||||||
|
UUID docId = UUID.randomUUID();
|
||||||
|
UUID userId = UUID.randomUUID();
|
||||||
|
Document doc = Document.builder().id(docId).filePath("test.pdf")
|
||||||
|
.fileHash("hash").scriptType(ScriptType.TYPEWRITER).build();
|
||||||
|
|
||||||
|
when(transcriptionService.listBlocks(docId)).thenReturn(List.of());
|
||||||
|
when(fileService.generatePresignedUrl(any())).thenReturn("http://presigned");
|
||||||
|
when(ocrClient.extractBlocks(any(), any())).thenReturn(List.of(
|
||||||
|
new OcrBlockResult(0, 0.1, 0.1, 0.8, 0.04, null, "Test")));
|
||||||
|
DocumentAnnotation ann = DocumentAnnotation.builder().id(UUID.randomUUID()).build();
|
||||||
|
when(annotationService.createOcrAnnotation(any(), any(), any(), any(), any())).thenReturn(ann);
|
||||||
|
|
||||||
|
ocrAsyncRunner.processDocument(docId, doc, userId);
|
||||||
|
|
||||||
|
ArgumentCaptor<TranscriptionBlock> captor = ArgumentCaptor.forClass(TranscriptionBlock.class);
|
||||||
|
verify(blockRepository).save(captor.capture());
|
||||||
|
assertThat(captor.getValue().getSource()).isEqualTo(BlockSource.OCR);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void runSingleDocument_setsJobDone_onSuccess() {
|
||||||
|
UUID jobId = UUID.randomUUID();
|
||||||
|
UUID docId = UUID.randomUUID();
|
||||||
|
UUID userId = UUID.randomUUID();
|
||||||
|
OcrJob job = OcrJob.builder().id(jobId).totalDocuments(1).status(OcrJobStatus.PENDING).build();
|
||||||
|
Document doc = Document.builder().id(docId).filePath("test.pdf")
|
||||||
|
.fileHash("hash").scriptType(ScriptType.TYPEWRITER).build();
|
||||||
|
|
||||||
|
when(ocrJobRepository.findById(jobId)).thenReturn(Optional.of(job));
|
||||||
|
when(ocrJobRepository.save(any())).thenAnswer(inv -> inv.getArgument(0));
|
||||||
|
when(documentService.getDocumentById(docId)).thenReturn(doc);
|
||||||
|
when(transcriptionService.listBlocks(docId)).thenReturn(List.of());
|
||||||
|
when(fileService.generatePresignedUrl(any())).thenReturn("http://presigned");
|
||||||
|
when(ocrClient.extractBlocks(any(), any())).thenReturn(List.of());
|
||||||
|
|
||||||
|
ocrAsyncRunner.runSingleDocument(jobId, docId, userId);
|
||||||
|
|
||||||
|
assertThat(job.getStatus()).isEqualTo(OcrJobStatus.DONE);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void runSingleDocument_setsJobFailed_onError() {
|
||||||
|
UUID jobId = UUID.randomUUID();
|
||||||
|
UUID docId = UUID.randomUUID();
|
||||||
|
UUID userId = UUID.randomUUID();
|
||||||
|
OcrJob job = OcrJob.builder().id(jobId).totalDocuments(1).status(OcrJobStatus.PENDING).build();
|
||||||
|
Document doc = Document.builder().id(docId).filePath("test.pdf")
|
||||||
|
.fileHash("hash").scriptType(ScriptType.TYPEWRITER).build();
|
||||||
|
|
||||||
|
when(ocrJobRepository.findById(jobId)).thenReturn(Optional.of(job));
|
||||||
|
when(ocrJobRepository.save(any())).thenAnswer(inv -> inv.getArgument(0));
|
||||||
|
when(documentService.getDocumentById(docId)).thenReturn(doc);
|
||||||
|
when(transcriptionService.listBlocks(docId)).thenReturn(List.of());
|
||||||
|
when(fileService.generatePresignedUrl(any())).thenReturn("http://presigned");
|
||||||
|
when(ocrClient.extractBlocks(any(), any())).thenThrow(new RuntimeException("OCR failed"));
|
||||||
|
|
||||||
|
ocrAsyncRunner.runSingleDocument(jobId, docId, userId);
|
||||||
|
|
||||||
|
assertThat(job.getStatus()).isEqualTo(OcrJobStatus.FAILED);
|
||||||
|
assertThat(job.getErrorCount()).isEqualTo(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -12,24 +12,20 @@ import org.raddatz.familienarchiv.repository.OcrJobDocumentRepository;
|
|||||||
import org.raddatz.familienarchiv.repository.OcrJobRepository;
|
import org.raddatz.familienarchiv.repository.OcrJobRepository;
|
||||||
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Optional;
|
|
||||||
import java.util.UUID;
|
import java.util.UUID;
|
||||||
|
|
||||||
import static org.assertj.core.api.Assertions.assertThat;
|
import static org.assertj.core.api.Assertions.assertThat;
|
||||||
import static org.assertj.core.api.Assertions.assertThatThrownBy;
|
import static org.assertj.core.api.Assertions.assertThatThrownBy;
|
||||||
import static org.mockito.ArgumentMatchers.any;
|
import static org.mockito.ArgumentMatchers.any;
|
||||||
import static org.mockito.ArgumentMatchers.eq;
|
|
||||||
import static org.mockito.Mockito.*;
|
import static org.mockito.Mockito.*;
|
||||||
|
|
||||||
@ExtendWith(MockitoExtension.class)
|
@ExtendWith(MockitoExtension.class)
|
||||||
class OcrBatchServiceTest {
|
class OcrBatchServiceTest {
|
||||||
|
|
||||||
@Mock OcrService ocrService;
|
|
||||||
@Mock OcrHealthClient ocrHealthClient;
|
@Mock OcrHealthClient ocrHealthClient;
|
||||||
@Mock DocumentService documentService;
|
|
||||||
@Mock OcrJobRepository ocrJobRepository;
|
@Mock OcrJobRepository ocrJobRepository;
|
||||||
@Mock OcrJobDocumentRepository ocrJobDocumentRepository;
|
@Mock OcrJobDocumentRepository ocrJobDocumentRepository;
|
||||||
@Mock OcrProgressService ocrProgressService;
|
@Mock OcrAsyncRunner ocrAsyncRunner;
|
||||||
|
|
||||||
@InjectMocks OcrBatchService ocrBatchService;
|
@InjectMocks OcrBatchService ocrBatchService;
|
||||||
|
|
||||||
@@ -44,7 +40,7 @@ class OcrBatchServiceTest {
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
void startBatch_createsJobAndReturnsJobId() {
|
void startBatch_createsJobAndDispatchesAsync() {
|
||||||
UUID docId = UUID.randomUUID();
|
UUID docId = UUID.randomUUID();
|
||||||
UUID userId = UUID.randomUUID();
|
UUID userId = UUID.randomUUID();
|
||||||
UUID jobId = UUID.randomUUID();
|
UUID jobId = UUID.randomUUID();
|
||||||
@@ -56,87 +52,29 @@ class OcrBatchServiceTest {
|
|||||||
return job;
|
return job;
|
||||||
});
|
});
|
||||||
when(ocrJobDocumentRepository.save(any())).thenAnswer(inv -> inv.getArgument(0));
|
when(ocrJobDocumentRepository.save(any())).thenAnswer(inv -> inv.getArgument(0));
|
||||||
when(ocrJobRepository.findById(jobId)).thenReturn(Optional.of(
|
|
||||||
OcrJob.builder().id(jobId).totalDocuments(1).status(OcrJobStatus.PENDING).build()));
|
|
||||||
when(ocrJobDocumentRepository.findByJobIdOrderByCreatedAtAsc(jobId)).thenReturn(List.of(
|
|
||||||
OcrJobDocument.builder().jobId(jobId).documentId(docId).status(OcrDocumentStatus.PENDING).build()));
|
|
||||||
|
|
||||||
Document doc = Document.builder().id(docId).status(DocumentStatus.UPLOADED)
|
UUID result = ocrBatchService.startBatch(List.of(docId), userId);
|
||||||
.filePath("test.pdf").fileHash("hash").scriptType(ScriptType.TYPEWRITER).build();
|
|
||||||
when(documentService.getDocumentById(docId)).thenReturn(doc);
|
|
||||||
|
|
||||||
UUID resultJobId = ocrBatchService.startBatch(List.of(docId), userId);
|
assertThat(result).isEqualTo(jobId);
|
||||||
|
verify(ocrAsyncRunner).runBatch(jobId, userId);
|
||||||
assertThat(resultJobId).isEqualTo(jobId);
|
|
||||||
verify(ocrService).processDocument(eq(docId), eq(doc), eq(userId));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
void processBatchAsync_skipsPlaceholderDocuments() {
|
void startBatch_createsJobDocumentForEachId() {
|
||||||
UUID jobId = UUID.randomUUID();
|
UUID doc1 = UUID.randomUUID();
|
||||||
UUID uploadedId = UUID.randomUUID();
|
UUID doc2 = UUID.randomUUID();
|
||||||
UUID placeholderId = UUID.randomUUID();
|
|
||||||
UUID userId = UUID.randomUUID();
|
UUID userId = UUID.randomUUID();
|
||||||
|
|
||||||
OcrJob job = OcrJob.builder().id(jobId).totalDocuments(2).status(OcrJobStatus.PENDING).build();
|
when(ocrHealthClient.isHealthy()).thenReturn(true);
|
||||||
when(ocrJobRepository.findById(jobId)).thenReturn(Optional.of(job));
|
when(ocrJobRepository.save(any())).thenAnswer(inv -> {
|
||||||
when(ocrJobRepository.save(any())).thenAnswer(inv -> inv.getArgument(0));
|
OcrJob job = inv.getArgument(0);
|
||||||
|
job.setId(UUID.randomUUID());
|
||||||
|
return job;
|
||||||
|
});
|
||||||
when(ocrJobDocumentRepository.save(any())).thenAnswer(inv -> inv.getArgument(0));
|
when(ocrJobDocumentRepository.save(any())).thenAnswer(inv -> inv.getArgument(0));
|
||||||
|
|
||||||
OcrJobDocument uploadedJobDoc = OcrJobDocument.builder()
|
ocrBatchService.startBatch(List.of(doc1, doc2), userId);
|
||||||
.jobId(jobId).documentId(uploadedId).status(OcrDocumentStatus.PENDING).build();
|
|
||||||
OcrJobDocument placeholderJobDoc = OcrJobDocument.builder()
|
|
||||||
.jobId(jobId).documentId(placeholderId).status(OcrDocumentStatus.PENDING).build();
|
|
||||||
when(ocrJobDocumentRepository.findByJobIdOrderByCreatedAtAsc(jobId))
|
|
||||||
.thenReturn(List.of(uploadedJobDoc, placeholderJobDoc));
|
|
||||||
|
|
||||||
Document uploaded = Document.builder().id(uploadedId).status(DocumentStatus.UPLOADED)
|
verify(ocrJobDocumentRepository, times(2)).save(any());
|
||||||
.filePath("test.pdf").fileHash("hash").scriptType(ScriptType.TYPEWRITER).build();
|
|
||||||
Document placeholder = Document.builder().id(placeholderId).status(DocumentStatus.PLACEHOLDER).build();
|
|
||||||
when(documentService.getDocumentById(uploadedId)).thenReturn(uploaded);
|
|
||||||
when(documentService.getDocumentById(placeholderId)).thenReturn(placeholder);
|
|
||||||
|
|
||||||
ocrBatchService.processBatchAsync(jobId, userId);
|
|
||||||
|
|
||||||
verify(ocrService).processDocument(eq(uploadedId), eq(uploaded), eq(userId));
|
|
||||||
verify(ocrService, never()).processDocument(eq(placeholderId), any(), any());
|
|
||||||
assertThat(placeholderJobDoc.getStatus()).isEqualTo(OcrDocumentStatus.SKIPPED);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
void processBatchAsync_continuesAfterSingleDocumentFailure() {
|
|
||||||
UUID jobId = UUID.randomUUID();
|
|
||||||
UUID failDocId = UUID.randomUUID();
|
|
||||||
UUID successDocId = UUID.randomUUID();
|
|
||||||
UUID userId = UUID.randomUUID();
|
|
||||||
|
|
||||||
OcrJob job = OcrJob.builder().id(jobId).totalDocuments(2).status(OcrJobStatus.PENDING).build();
|
|
||||||
when(ocrJobRepository.findById(jobId)).thenReturn(Optional.of(job));
|
|
||||||
when(ocrJobRepository.save(any())).thenAnswer(inv -> inv.getArgument(0));
|
|
||||||
when(ocrJobDocumentRepository.save(any())).thenAnswer(inv -> inv.getArgument(0));
|
|
||||||
|
|
||||||
OcrJobDocument failJobDoc = OcrJobDocument.builder()
|
|
||||||
.jobId(jobId).documentId(failDocId).status(OcrDocumentStatus.PENDING).build();
|
|
||||||
OcrJobDocument successJobDoc = OcrJobDocument.builder()
|
|
||||||
.jobId(jobId).documentId(successDocId).status(OcrDocumentStatus.PENDING).build();
|
|
||||||
when(ocrJobDocumentRepository.findByJobIdOrderByCreatedAtAsc(jobId))
|
|
||||||
.thenReturn(List.of(failJobDoc, successJobDoc));
|
|
||||||
|
|
||||||
Document failDoc = Document.builder().id(failDocId).status(DocumentStatus.UPLOADED)
|
|
||||||
.filePath("fail.pdf").fileHash("hash1").scriptType(ScriptType.TYPEWRITER).build();
|
|
||||||
Document successDoc = Document.builder().id(successDocId).status(DocumentStatus.UPLOADED)
|
|
||||||
.filePath("success.pdf").fileHash("hash2").scriptType(ScriptType.TYPEWRITER).build();
|
|
||||||
when(documentService.getDocumentById(failDocId)).thenReturn(failDoc);
|
|
||||||
when(documentService.getDocumentById(successDocId)).thenReturn(successDoc);
|
|
||||||
|
|
||||||
doThrow(new RuntimeException("OCR failed")).when(ocrService)
|
|
||||||
.processDocument(eq(failDocId), any(), any());
|
|
||||||
|
|
||||||
ocrBatchService.processBatchAsync(jobId, userId);
|
|
||||||
|
|
||||||
verify(ocrService).processDocument(eq(successDocId), eq(successDoc), eq(userId));
|
|
||||||
assertThat(failJobDoc.getStatus()).isEqualTo(OcrDocumentStatus.FAILED);
|
|
||||||
assertThat(successJobDoc.getStatus()).isEqualTo(OcrDocumentStatus.DONE);
|
|
||||||
assertThat(job.getStatus()).isEqualTo(OcrJobStatus.DONE);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -2,50 +2,39 @@ package org.raddatz.familienarchiv.service;
|
|||||||
|
|
||||||
import org.junit.jupiter.api.Test;
|
import org.junit.jupiter.api.Test;
|
||||||
import org.junit.jupiter.api.extension.ExtendWith;
|
import org.junit.jupiter.api.extension.ExtendWith;
|
||||||
import org.mockito.ArgumentCaptor;
|
|
||||||
import org.mockito.InjectMocks;
|
import org.mockito.InjectMocks;
|
||||||
import org.mockito.Mock;
|
import org.mockito.Mock;
|
||||||
import org.mockito.junit.jupiter.MockitoExtension;
|
import org.mockito.junit.jupiter.MockitoExtension;
|
||||||
import org.mockito.ArgumentCaptor;
|
|
||||||
import org.raddatz.familienarchiv.dto.CreateAnnotationDTO;
|
|
||||||
import org.raddatz.familienarchiv.exception.DomainException;
|
import org.raddatz.familienarchiv.exception.DomainException;
|
||||||
import org.raddatz.familienarchiv.exception.ErrorCode;
|
import org.raddatz.familienarchiv.exception.ErrorCode;
|
||||||
import org.raddatz.familienarchiv.model.*;
|
import org.raddatz.familienarchiv.model.*;
|
||||||
import org.raddatz.familienarchiv.repository.OcrJobRepository;
|
import org.raddatz.familienarchiv.repository.OcrJobRepository;
|
||||||
import org.raddatz.familienarchiv.repository.TranscriptionBlockRepository;
|
|
||||||
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.UUID;
|
import java.util.UUID;
|
||||||
|
|
||||||
import static org.assertj.core.api.Assertions.assertThat;
|
import static org.assertj.core.api.Assertions.assertThat;
|
||||||
import static org.assertj.core.api.Assertions.assertThatThrownBy;
|
import static org.assertj.core.api.Assertions.assertThatThrownBy;
|
||||||
import static org.mockito.ArgumentMatchers.any;
|
import static org.mockito.ArgumentMatchers.any;
|
||||||
import static org.mockito.ArgumentMatchers.eq;
|
|
||||||
import static org.mockito.Mockito.*;
|
import static org.mockito.Mockito.*;
|
||||||
import static org.springframework.http.HttpStatus.*;
|
import static org.springframework.http.HttpStatus.*;
|
||||||
|
|
||||||
@ExtendWith(MockitoExtension.class)
|
@ExtendWith(MockitoExtension.class)
|
||||||
class OcrServiceTest {
|
class OcrServiceTest {
|
||||||
|
|
||||||
@Mock OcrClient ocrClient;
|
|
||||||
@Mock OcrHealthClient ocrHealthClient;
|
@Mock OcrHealthClient ocrHealthClient;
|
||||||
@Mock DocumentService documentService;
|
@Mock DocumentService documentService;
|
||||||
@Mock TranscriptionService transcriptionService;
|
|
||||||
@Mock AnnotationService annotationService;
|
|
||||||
@Mock TranscriptionBlockRepository blockRepository;
|
|
||||||
@Mock OcrJobRepository ocrJobRepository;
|
@Mock OcrJobRepository ocrJobRepository;
|
||||||
@Mock FileService fileService;
|
@Mock OcrAsyncRunner ocrAsyncRunner;
|
||||||
|
|
||||||
@InjectMocks OcrService ocrService;
|
@InjectMocks OcrService ocrService;
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
void startOcr_throwsBadRequest_whenDocumentIsPlaceholder() {
|
void startOcr_throwsBadRequest_whenDocumentIsPlaceholder() {
|
||||||
UUID docId = UUID.randomUUID();
|
UUID docId = UUID.randomUUID();
|
||||||
UUID userId = UUID.randomUUID();
|
|
||||||
Document doc = Document.builder().id(docId).status(DocumentStatus.PLACEHOLDER).build();
|
Document doc = Document.builder().id(docId).status(DocumentStatus.PLACEHOLDER).build();
|
||||||
when(documentService.getDocumentById(docId)).thenReturn(doc);
|
when(documentService.getDocumentById(docId)).thenReturn(doc);
|
||||||
|
|
||||||
assertThatThrownBy(() -> ocrService.startOcr(docId, null, userId))
|
assertThatThrownBy(() -> ocrService.startOcr(docId, null, UUID.randomUUID()))
|
||||||
.isInstanceOf(DomainException.class)
|
.isInstanceOf(DomainException.class)
|
||||||
.satisfies(e -> {
|
.satisfies(e -> {
|
||||||
DomainException de = (DomainException) e;
|
DomainException de = (DomainException) e;
|
||||||
@@ -57,150 +46,53 @@ class OcrServiceTest {
|
|||||||
@Test
|
@Test
|
||||||
void startOcr_throwsServiceUnavailable_whenOcrServiceIsDown() {
|
void startOcr_throwsServiceUnavailable_whenOcrServiceIsDown() {
|
||||||
UUID docId = UUID.randomUUID();
|
UUID docId = UUID.randomUUID();
|
||||||
UUID userId = UUID.randomUUID();
|
|
||||||
Document doc = Document.builder().id(docId).status(DocumentStatus.UPLOADED)
|
Document doc = Document.builder().id(docId).status(DocumentStatus.UPLOADED)
|
||||||
.filePath("documents/test.pdf").fileHash("hash123").build();
|
.filePath("test.pdf").build();
|
||||||
when(documentService.getDocumentById(docId)).thenReturn(doc);
|
when(documentService.getDocumentById(docId)).thenReturn(doc);
|
||||||
when(ocrHealthClient.isHealthy()).thenReturn(false);
|
when(ocrHealthClient.isHealthy()).thenReturn(false);
|
||||||
|
|
||||||
assertThatThrownBy(() -> ocrService.startOcr(docId, null, userId))
|
assertThatThrownBy(() -> ocrService.startOcr(docId, null, UUID.randomUUID()))
|
||||||
.isInstanceOf(DomainException.class)
|
.isInstanceOf(DomainException.class)
|
||||||
.satisfies(e -> {
|
.satisfies(e -> assertThat(((DomainException) e).getCode())
|
||||||
DomainException de = (DomainException) e;
|
.isEqualTo(ErrorCode.OCR_SERVICE_UNAVAILABLE));
|
||||||
assertThat(de.getCode()).isEqualTo(ErrorCode.OCR_SERVICE_UNAVAILABLE);
|
|
||||||
});
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
void startOcr_createsJobAndReturnsJobId() {
|
void startOcr_createsJobAndDispatchesAsync() {
|
||||||
UUID docId = UUID.randomUUID();
|
UUID docId = UUID.randomUUID();
|
||||||
UUID userId = UUID.randomUUID();
|
UUID userId = UUID.randomUUID();
|
||||||
UUID jobId = UUID.randomUUID();
|
UUID jobId = UUID.randomUUID();
|
||||||
Document doc = Document.builder().id(docId).status(DocumentStatus.UPLOADED)
|
Document doc = Document.builder().id(docId).status(DocumentStatus.UPLOADED)
|
||||||
.filePath("documents/test.pdf").fileHash("hash123")
|
.filePath("test.pdf").scriptType(ScriptType.TYPEWRITER).build();
|
||||||
.scriptType(ScriptType.TYPEWRITER).build();
|
|
||||||
when(documentService.getDocumentById(docId)).thenReturn(doc);
|
when(documentService.getDocumentById(docId)).thenReturn(doc);
|
||||||
when(ocrHealthClient.isHealthy()).thenReturn(true);
|
when(ocrHealthClient.isHealthy()).thenReturn(true);
|
||||||
when(fileService.generatePresignedUrl(any())).thenReturn("http://minio/presigned");
|
|
||||||
when(ocrClient.extractBlocks(any(), any())).thenReturn(List.of());
|
|
||||||
when(ocrJobRepository.save(any())).thenAnswer(inv -> {
|
when(ocrJobRepository.save(any())).thenAnswer(inv -> {
|
||||||
OcrJob job = inv.getArgument(0);
|
OcrJob job = inv.getArgument(0);
|
||||||
job.setId(jobId);
|
job.setId(jobId);
|
||||||
return job;
|
return job;
|
||||||
});
|
});
|
||||||
|
|
||||||
UUID resultJobId = ocrService.startOcr(docId, ScriptType.TYPEWRITER, userId);
|
UUID result = ocrService.startOcr(docId, null, userId);
|
||||||
|
|
||||||
assertThat(resultJobId).isEqualTo(jobId);
|
assertThat(result).isEqualTo(jobId);
|
||||||
verify(ocrJobRepository, atLeastOnce()).save(any());
|
verify(ocrAsyncRunner).runSingleDocument(jobId, docId, userId);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
void startOcr_setsScriptTypeOnDocument_whenProvided() {
|
void startOcr_setsScriptTypeOnDocument_whenProvided() {
|
||||||
UUID docId = UUID.randomUUID();
|
UUID docId = UUID.randomUUID();
|
||||||
UUID userId = UUID.randomUUID();
|
|
||||||
Document doc = Document.builder().id(docId).status(DocumentStatus.UPLOADED)
|
Document doc = Document.builder().id(docId).status(DocumentStatus.UPLOADED)
|
||||||
.filePath("documents/test.pdf").fileHash("hash123")
|
.filePath("test.pdf").scriptType(ScriptType.UNKNOWN).build();
|
||||||
.scriptType(ScriptType.UNKNOWN).build();
|
|
||||||
when(documentService.getDocumentById(docId)).thenReturn(doc);
|
when(documentService.getDocumentById(docId)).thenReturn(doc);
|
||||||
when(ocrHealthClient.isHealthy()).thenReturn(true);
|
when(ocrHealthClient.isHealthy()).thenReturn(true);
|
||||||
when(fileService.generatePresignedUrl(any())).thenReturn("http://minio/presigned");
|
|
||||||
when(ocrClient.extractBlocks(any(), any())).thenReturn(List.of());
|
|
||||||
when(ocrJobRepository.save(any())).thenAnswer(inv -> {
|
when(ocrJobRepository.save(any())).thenAnswer(inv -> {
|
||||||
OcrJob job = inv.getArgument(0);
|
OcrJob job = inv.getArgument(0);
|
||||||
job.setId(UUID.randomUUID());
|
job.setId(UUID.randomUUID());
|
||||||
return job;
|
return job;
|
||||||
});
|
});
|
||||||
|
|
||||||
ocrService.startOcr(docId, ScriptType.HANDWRITING_LATIN, userId);
|
ocrService.startOcr(docId, ScriptType.HANDWRITING_LATIN, UUID.randomUUID());
|
||||||
|
|
||||||
assertThat(doc.getScriptType()).isEqualTo(ScriptType.HANDWRITING_LATIN);
|
assertThat(doc.getScriptType()).isEqualTo(ScriptType.HANDWRITING_LATIN);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
|
||||||
void startOcr_clearsExistingBlocks_beforeCreatingNew() {
|
|
||||||
UUID docId = UUID.randomUUID();
|
|
||||||
UUID userId = UUID.randomUUID();
|
|
||||||
Document doc = Document.builder().id(docId).status(DocumentStatus.UPLOADED)
|
|
||||||
.filePath("documents/test.pdf").fileHash("hash123")
|
|
||||||
.scriptType(ScriptType.TYPEWRITER).build();
|
|
||||||
TranscriptionBlock existingBlock = TranscriptionBlock.builder()
|
|
||||||
.id(UUID.randomUUID()).documentId(docId).build();
|
|
||||||
|
|
||||||
when(documentService.getDocumentById(docId)).thenReturn(doc);
|
|
||||||
when(ocrHealthClient.isHealthy()).thenReturn(true);
|
|
||||||
when(fileService.generatePresignedUrl(any())).thenReturn("http://minio/presigned");
|
|
||||||
when(transcriptionService.listBlocks(docId)).thenReturn(List.of(existingBlock));
|
|
||||||
when(ocrClient.extractBlocks(any(), any())).thenReturn(List.of(
|
|
||||||
new OcrBlockResult(0, 0.1, 0.1, 0.8, 0.04, null, "Hello")));
|
|
||||||
when(ocrJobRepository.save(any())).thenAnswer(inv -> {
|
|
||||||
OcrJob job = inv.getArgument(0);
|
|
||||||
job.setId(UUID.randomUUID());
|
|
||||||
return job;
|
|
||||||
});
|
|
||||||
DocumentAnnotation ann = DocumentAnnotation.builder().id(UUID.randomUUID()).build();
|
|
||||||
when(annotationService.createOcrAnnotation(any(), any(), any(), any(), any())).thenReturn(ann);
|
|
||||||
|
|
||||||
ocrService.startOcr(docId, null, userId);
|
|
||||||
|
|
||||||
verify(transcriptionService).deleteBlock(docId, existingBlock.getId());
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
void startOcr_createsAnnotationAndBlock_forEachOcrResult() {
|
|
||||||
UUID docId = UUID.randomUUID();
|
|
||||||
UUID userId = UUID.randomUUID();
|
|
||||||
Document doc = Document.builder().id(docId).status(DocumentStatus.UPLOADED)
|
|
||||||
.filePath("documents/test.pdf").fileHash("hash123")
|
|
||||||
.scriptType(ScriptType.TYPEWRITER).build();
|
|
||||||
|
|
||||||
OcrBlockResult block1 = new OcrBlockResult(0, 0.1, 0.1, 0.8, 0.04, null, "Line 1");
|
|
||||||
OcrBlockResult block2 = new OcrBlockResult(0, 0.1, 0.2, 0.8, 0.04, null, "Line 2");
|
|
||||||
|
|
||||||
when(documentService.getDocumentById(docId)).thenReturn(doc);
|
|
||||||
when(ocrHealthClient.isHealthy()).thenReturn(true);
|
|
||||||
when(fileService.generatePresignedUrl(any())).thenReturn("http://minio/presigned");
|
|
||||||
when(transcriptionService.listBlocks(docId)).thenReturn(List.of());
|
|
||||||
when(ocrClient.extractBlocks(any(), any())).thenReturn(List.of(block1, block2));
|
|
||||||
when(ocrJobRepository.save(any())).thenAnswer(inv -> {
|
|
||||||
OcrJob job = inv.getArgument(0);
|
|
||||||
job.setId(UUID.randomUUID());
|
|
||||||
return job;
|
|
||||||
});
|
|
||||||
DocumentAnnotation ann = DocumentAnnotation.builder().id(UUID.randomUUID()).build();
|
|
||||||
when(annotationService.createOcrAnnotation(any(), any(), any(), any(), any())).thenReturn(ann);
|
|
||||||
|
|
||||||
ocrService.startOcr(docId, null, userId);
|
|
||||||
|
|
||||||
verify(annotationService, times(2)).createOcrAnnotation(
|
|
||||||
eq(docId), any(CreateAnnotationDTO.class), eq(userId), eq("hash123"), any());
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
void startOcr_setsBlockSourceToOcr() {
|
|
||||||
UUID docId = UUID.randomUUID();
|
|
||||||
UUID userId = UUID.randomUUID();
|
|
||||||
Document doc = Document.builder().id(docId).status(DocumentStatus.UPLOADED)
|
|
||||||
.filePath("documents/test.pdf").fileHash("hash123")
|
|
||||||
.scriptType(ScriptType.TYPEWRITER).build();
|
|
||||||
OcrBlockResult block = new OcrBlockResult(0, 0.1, 0.1, 0.8, 0.04, null, "Test");
|
|
||||||
|
|
||||||
when(documentService.getDocumentById(docId)).thenReturn(doc);
|
|
||||||
when(ocrHealthClient.isHealthy()).thenReturn(true);
|
|
||||||
when(transcriptionService.listBlocks(docId)).thenReturn(List.of());
|
|
||||||
when(ocrClient.extractBlocks(any(), any())).thenReturn(List.of(block));
|
|
||||||
when(ocrJobRepository.save(any())).thenAnswer(inv -> {
|
|
||||||
OcrJob job = inv.getArgument(0);
|
|
||||||
job.setId(UUID.randomUUID());
|
|
||||||
return job;
|
|
||||||
});
|
|
||||||
DocumentAnnotation ann = DocumentAnnotation.builder().id(UUID.randomUUID()).build();
|
|
||||||
when(annotationService.createOcrAnnotation(any(), any(), any(), any(), any())).thenReturn(ann);
|
|
||||||
|
|
||||||
ocrService.startOcr(docId, null, userId);
|
|
||||||
|
|
||||||
ArgumentCaptor<TranscriptionBlock> captor = ArgumentCaptor.forClass(TranscriptionBlock.class);
|
|
||||||
verify(blockRepository).save(captor.capture());
|
|
||||||
assertThat(captor.getValue().getSource()).isEqualTo(BlockSource.OCR);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -6,7 +6,6 @@ import DocumentViewer from '$lib/components/DocumentViewer.svelte';
|
|||||||
import TranscriptionEditView from '$lib/components/TranscriptionEditView.svelte';
|
import TranscriptionEditView from '$lib/components/TranscriptionEditView.svelte';
|
||||||
import TranscriptionReadView from '$lib/components/TranscriptionReadView.svelte';
|
import TranscriptionReadView from '$lib/components/TranscriptionReadView.svelte';
|
||||||
import TranscriptionPanelHeader from '$lib/components/TranscriptionPanelHeader.svelte';
|
import TranscriptionPanelHeader from '$lib/components/TranscriptionPanelHeader.svelte';
|
||||||
import OcrProgress from '$lib/components/OcrProgress.svelte';
|
|
||||||
import type { TranscriptionBlockData } from '$lib/types';
|
import type { TranscriptionBlockData } from '$lib/types';
|
||||||
|
|
||||||
let { data } = $props();
|
let { data } = $props();
|
||||||
@@ -58,7 +57,6 @@ let activeAnnotationId = $state<string | null>(null);
|
|||||||
let highlightBlockId = $state<string | null>(null);
|
let highlightBlockId = $state<string | null>(null);
|
||||||
let flashAnnotationId = $state<string | null>(null);
|
let flashAnnotationId = $state<string | null>(null);
|
||||||
let pdfStripExpanded = $state(false);
|
let pdfStripExpanded = $state(false);
|
||||||
let ocrJobId = $state<string | null>(null);
|
|
||||||
|
|
||||||
const prefersReducedMotion = $derived(
|
const prefersReducedMotion = $derived(
|
||||||
typeof window !== 'undefined' && window.matchMedia('(prefers-reduced-motion: reduce)').matches
|
typeof window !== 'undefined' && window.matchMedia('(prefers-reduced-motion: reduce)').matches
|
||||||
@@ -129,7 +127,11 @@ async function reviewToggle(blockId: string) {
|
|||||||
transcriptionBlocks = transcriptionBlocks.map((b) => (b.id === blockId ? updated : b));
|
transcriptionBlocks = transcriptionBlocks.map((b) => (b.id === blockId ? updated : b));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
let ocrRunning = $state(false);
|
||||||
|
let ocrPollTimer = $state<ReturnType<typeof setInterval> | null>(null);
|
||||||
|
|
||||||
async function triggerOcr(scriptType: string) {
|
async function triggerOcr(scriptType: string) {
|
||||||
|
ocrRunning = true;
|
||||||
try {
|
try {
|
||||||
const res = await fetch(`/api/documents/${doc.id}/ocr`, {
|
const res = await fetch(`/api/documents/${doc.id}/ocr`, {
|
||||||
method: 'POST',
|
method: 'POST',
|
||||||
@@ -138,18 +140,35 @@ async function triggerOcr(scriptType: string) {
|
|||||||
});
|
});
|
||||||
if (res.ok) {
|
if (res.ok) {
|
||||||
const data = await res.json();
|
const data = await res.json();
|
||||||
ocrJobId = data.jobId;
|
pollOcrJob(data.jobId);
|
||||||
|
} else {
|
||||||
|
ocrRunning = false;
|
||||||
}
|
}
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
console.error('Failed to trigger OCR:', e);
|
console.error('Failed to trigger OCR:', e);
|
||||||
|
ocrRunning = false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
async function handleOcrDone() {
|
function pollOcrJob(jobId: string) {
|
||||||
ocrJobId = null;
|
if (ocrPollTimer) clearInterval(ocrPollTimer);
|
||||||
await loadTranscriptionBlocks();
|
ocrPollTimer = setInterval(async () => {
|
||||||
annotationReloadKey++;
|
try {
|
||||||
panelMode = transcriptionBlocks.length > 0 ? 'read' : 'edit';
|
const res = await fetch(`/api/ocr/jobs/${jobId}`);
|
||||||
|
if (!res.ok) return;
|
||||||
|
const job = await res.json();
|
||||||
|
if (job.status === 'DONE' || job.status === 'FAILED') {
|
||||||
|
if (ocrPollTimer) clearInterval(ocrPollTimer);
|
||||||
|
ocrPollTimer = null;
|
||||||
|
ocrRunning = false;
|
||||||
|
await loadTranscriptionBlocks();
|
||||||
|
annotationReloadKey++;
|
||||||
|
panelMode = transcriptionBlocks.length > 0 ? 'read' : 'edit';
|
||||||
|
}
|
||||||
|
} catch {
|
||||||
|
// polling is best-effort
|
||||||
|
}
|
||||||
|
}, 3000);
|
||||||
}
|
}
|
||||||
|
|
||||||
async function createBlockFromDraw(rect: {
|
async function createBlockFromDraw(rect: {
|
||||||
@@ -232,28 +251,12 @@ function handleParagraphClick(annotationId: string) {
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
async function checkOcrStatus() {
|
// Load blocks when transcribe mode is entered and set default panel mode
|
||||||
if (!doc?.id) return;
|
|
||||||
try {
|
|
||||||
const res = await fetch(`/api/documents/${doc.id}/ocr-status`);
|
|
||||||
if (res.ok) {
|
|
||||||
const status = await res.json();
|
|
||||||
if (status.status === 'PENDING' || status.status === 'RUNNING') {
|
|
||||||
ocrJobId = status.jobId;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} catch {
|
|
||||||
// OCR status check is best-effort
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Load blocks and check OCR status when transcribe mode is entered
|
|
||||||
$effect(() => {
|
$effect(() => {
|
||||||
if (transcribeMode) {
|
if (transcribeMode) {
|
||||||
loadTranscriptionBlocks().then(() => {
|
loadTranscriptionBlocks().then(() => {
|
||||||
panelMode = transcriptionBlocks.length > 0 ? 'read' : 'edit';
|
panelMode = transcriptionBlocks.length > 0 ? 'read' : 'edit';
|
||||||
});
|
});
|
||||||
checkOcrStatus();
|
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
@@ -277,7 +280,10 @@ onMount(() => {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
document.addEventListener('keydown', onKeyDown);
|
document.addEventListener('keydown', onKeyDown);
|
||||||
return () => document.removeEventListener('keydown', onKeyDown);
|
return () => {
|
||||||
|
document.removeEventListener('keydown', onKeyDown);
|
||||||
|
if (ocrPollTimer) clearInterval(ocrPollTimer);
|
||||||
|
};
|
||||||
});
|
});
|
||||||
</script>
|
</script>
|
||||||
|
|
||||||
@@ -353,9 +359,30 @@ onMount(() => {
|
|||||||
onClose={() => (transcribeMode = false)}
|
onClose={() => (transcribeMode = false)}
|
||||||
/>
|
/>
|
||||||
<div class="flex-1 overflow-y-auto">
|
<div class="flex-1 overflow-y-auto">
|
||||||
{#if ocrJobId}
|
{#if ocrRunning}
|
||||||
<div class="p-4">
|
<div class="flex flex-1 flex-col items-center justify-center px-6 py-12 text-center">
|
||||||
<OcrProgress jobId={ocrJobId} onDone={handleOcrDone} />
|
<svg
|
||||||
|
class="mb-4 h-8 w-8 animate-spin text-brand-mint"
|
||||||
|
viewBox="0 0 24 24"
|
||||||
|
fill="none"
|
||||||
|
>
|
||||||
|
<circle
|
||||||
|
class="opacity-25"
|
||||||
|
cx="12"
|
||||||
|
cy="12"
|
||||||
|
r="10"
|
||||||
|
stroke="currentColor"
|
||||||
|
stroke-width="4"
|
||||||
|
></circle>
|
||||||
|
<path
|
||||||
|
class="opacity-75"
|
||||||
|
fill="currentColor"
|
||||||
|
d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4z"
|
||||||
|
></path>
|
||||||
|
</svg>
|
||||||
|
<p class="text-xs font-bold tracking-widest text-gray-400 uppercase">
|
||||||
|
{m.ocr_progress_heading()}
|
||||||
|
</p>
|
||||||
</div>
|
</div>
|
||||||
{:else if panelMode === 'read'}
|
{:else if panelMode === 'read'}
|
||||||
<TranscriptionReadView
|
<TranscriptionReadView
|
||||||
|
|||||||
Reference in New Issue
Block a user