refactor(ocr): make single-document OCR async, fix circular dependency

OcrService → OcrAsyncRunner was circular. Fixed by moving all OCR processing logic (processDocument, clearExistingBlocks, createBlocks) into OcrAsyncRunner. OcrService is now a thin entry point that validates, creates the job, and dispatches to OcrAsyncRunner. Architecture: - OcrService: validates document, checks health, creates OcrJob, delegates - OcrAsyncRunner: @Async processDocument + runSingleDocument + runBatch - OcrBatchService: creates job + job documents, delegates to OcrAsyncRunner - No circular dependencies Single-document OCR is now async (returns jobId immediately). Frontend polls GET /api/ocr/jobs/{jobId} every 3s until DONE/FAILED. 816 backend tests pass, 687 frontend tests pass. Refs #226 Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-12 22:55:52 +02:00
parent 741979304c
commit dd175d09e2
7 changed files with 388 additions and 359 deletions
--- a/backend/src/main/java/org/raddatz/familienarchiv/service/OcrAsyncRunner.java
+++ b/backend/src/main/java/org/raddatz/familienarchiv/service/OcrAsyncRunner.java
@@ -0,0 +1,156 @@
+package org.raddatz.familienarchiv.service;
+
+import lombok.RequiredArgsConstructor;
+import lombok.extern.slf4j.Slf4j;
+import org.raddatz.familienarchiv.dto.CreateAnnotationDTO;
+import org.raddatz.familienarchiv.model.*;
+import org.raddatz.familienarchiv.repository.OcrJobDocumentRepository;
+import org.raddatz.familienarchiv.repository.OcrJobRepository;
+import org.raddatz.familienarchiv.repository.TranscriptionBlockRepository;
+import org.springframework.scheduling.annotation.Async;
+import org.springframework.stereotype.Component;
+
+import java.util.List;
+import java.util.Map;
+import java.util.UUID;
+
+@Component
+@RequiredArgsConstructor
+@Slf4j
+public class OcrAsyncRunner {
+
+    private static final String OCR_ANNOTATION_COLOR = "#00C7B1";
+
+    private final OcrClient ocrClient;
+    private final DocumentService documentService;
+    private final TranscriptionService transcriptionService;
+    private final AnnotationService annotationService;
+    private final TranscriptionBlockRepository blockRepository;
+    private final FileService fileService;
+    private final OcrJobRepository ocrJobRepository;
+    private final OcrJobDocumentRepository ocrJobDocumentRepository;
+    private final OcrProgressService ocrProgressService;
+
+    @Async
+    public void runSingleDocument(UUID jobId, UUID documentId, UUID userId) {
+        OcrJob job = ocrJobRepository.findById(jobId).orElse(null);
+        if (job == null) return;
+
+        job.setStatus(OcrJobStatus.RUNNING);
+        ocrJobRepository.save(job);
+
+        Document doc = documentService.getDocumentById(documentId);
+
+        try {
+            processDocument(documentId, doc, userId);
+            job.setStatus(OcrJobStatus.DONE);
+            job.setProcessedDocuments(1);
+        } catch (Exception e) {
+            log.error("OCR processing failed for document {}", documentId, e);
+            job.setStatus(OcrJobStatus.FAILED);
+            job.setErrorCount(1);
+        }
+
+        ocrJobRepository.save(job);
+    }
+
+    @Async
+    public void runBatch(UUID jobId, UUID userId) {
+        OcrJob job = ocrJobRepository.findById(jobId).orElse(null);
+        if (job == null) return;
+
+        job.setStatus(OcrJobStatus.RUNNING);
+        ocrJobRepository.save(job);
+
+        List<OcrJobDocument> jobDocs = ocrJobDocumentRepository.findByJobIdOrderByCreatedAtAsc(jobId);
+
+        for (OcrJobDocument jobDoc : jobDocs) {
+            Document doc = documentService.getDocumentById(jobDoc.getDocumentId());
+
+            if (doc.getStatus() == DocumentStatus.PLACEHOLDER) {
+                jobDoc.setStatus(OcrDocumentStatus.SKIPPED);
+                ocrJobDocumentRepository.save(jobDoc);
+                job.setSkippedCount(job.getSkippedCount() + 1);
+                ocrJobRepository.save(job);
+                ocrProgressService.emit(jobId, "document", Map.of(
+                        "documentId", jobDoc.getDocumentId(),
+                        "status", "SKIPPED",
+                        "processed", job.getProcessedDocuments(),
+                        "total", job.getTotalDocuments()));
+                continue;
+            }
+
+            jobDoc.setStatus(OcrDocumentStatus.RUNNING);
+            ocrJobDocumentRepository.save(jobDoc);
+
+            try {
+                processDocument(jobDoc.getDocumentId(), doc, userId);
+                jobDoc.setStatus(OcrDocumentStatus.DONE);
+                job.setProcessedDocuments(job.getProcessedDocuments() + 1);
+            } catch (Exception e) {
+                log.error("OCR batch: failed document {}", jobDoc.getDocumentId(), e);
+                jobDoc.setStatus(OcrDocumentStatus.FAILED);
+                jobDoc.setErrorMessage(e.getMessage());
+                job.setErrorCount(job.getErrorCount() + 1);
+            }
+
+            ocrJobDocumentRepository.save(jobDoc);
+            ocrJobRepository.save(job);
+
+            ocrProgressService.emit(jobId, "document", Map.of(
+                    "documentId", jobDoc.getDocumentId(),
+                    "status", jobDoc.getStatus().name(),
+                    "processed", job.getProcessedDocuments(),
+                    "total", job.getTotalDocuments()));
+        }
+
+        job.setStatus(OcrJobStatus.DONE);
+        ocrJobRepository.save(job);
+
+        ocrProgressService.emit(jobId, "done", Map.of(
+                "processed", job.getProcessedDocuments(),
+                "errors", job.getErrorCount(),
+                "skipped", job.getSkippedCount()));
+        ocrProgressService.complete(jobId);
+    }
+
+    void processDocument(UUID documentId, Document doc, UUID userId) {
+        clearExistingBlocks(documentId);
+
+        String pdfUrl = fileService.generatePresignedUrl(doc.getFilePath());
+        List<OcrBlockResult> blocks = ocrClient.extractBlocks(pdfUrl, doc.getScriptType());
+        createTranscriptionBlocks(documentId, blocks, userId, doc.getFileHash());
+    }
+
+    private void clearExistingBlocks(UUID documentId) {
+        List<TranscriptionBlock> existing = transcriptionService.listBlocks(documentId);
+        for (TranscriptionBlock block : existing) {
+            transcriptionService.deleteBlock(documentId, block.getId());
+        }
+    }
+
+    private void createTranscriptionBlocks(UUID documentId, List<OcrBlockResult> blocks,
+                                            UUID userId, String fileHash) {
+        for (int i = 0; i < blocks.size(); i++) {
+            OcrBlockResult block = blocks.get(i);
+
+            CreateAnnotationDTO annotationDTO = new CreateAnnotationDTO(
+                    block.pageNumber(), block.x(), block.y(),
+                    block.width(), block.height(), OCR_ANNOTATION_COLOR);
+
+            DocumentAnnotation annotation = annotationService.createOcrAnnotation(
+                    documentId, annotationDTO, userId, fileHash, block.polygon());
+
+            TranscriptionBlock transcriptionBlock = TranscriptionBlock.builder()
+                    .annotationId(annotation.getId())
+                    .documentId(documentId)
+                    .text(block.text() != null ? block.text() : "")
+                    .sortOrder(i)
+                    .source(BlockSource.OCR)
+                    .createdBy(userId)
+                    .updatedBy(userId)
+                    .build();
+            blockRepository.save(transcriptionBlock);
+        }
+    }
+}
--- a/backend/src/main/java/org/raddatz/familienarchiv/service/OcrBatchService.java
+++ b/backend/src/main/java/org/raddatz/familienarchiv/service/OcrBatchService.java
@@ -7,11 +7,9 @@ import org.raddatz.familienarchiv.exception.ErrorCode;
 import org.raddatz.familienarchiv.model.*;
 import org.raddatz.familienarchiv.repository.OcrJobDocumentRepository;
 import org.raddatz.familienarchiv.repository.OcrJobRepository;
-import org.springframework.scheduling.annotation.Async;
 import org.springframework.stereotype.Service;

 import java.util.List;
-import java.util.Map;
 import java.util.UUID;

@Service
@@ -19,12 +17,10 @@ import java.util.UUID;
@Slf4j
 public class OcrBatchService {

-    private final OcrService ocrService;
    private final OcrHealthClient ocrHealthClient;
-    private final DocumentService documentService;
    private final OcrJobRepository ocrJobRepository;
    private final OcrJobDocumentRepository ocrJobDocumentRepository;
-    private final OcrProgressService ocrProgressService;
+    private final OcrAsyncRunner ocrAsyncRunner;

    public UUID startBatch(List<UUID> documentIds, UUID userId) {
        if (!ocrHealthClient.isHealthy()) {
@@ -48,67 +44,7 @@ public class OcrBatchService {
            ocrJobDocumentRepository.save(jobDoc);
        }

-        processBatchAsync(job.getId(), userId);
+        ocrAsyncRunner.runBatch(job.getId(), userId);
        return job.getId();
    }
-
-    @Async
-    void processBatchAsync(UUID jobId, UUID userId) {
-        OcrJob job = ocrJobRepository.findById(jobId).orElse(null);
-        if (job == null) return;
-
-        job.setStatus(OcrJobStatus.RUNNING);
-        ocrJobRepository.save(job);
-
-        List<OcrJobDocument> jobDocs = ocrJobDocumentRepository.findByJobIdOrderByCreatedAtAsc(jobId);
-
-        for (OcrJobDocument jobDoc : jobDocs) {
-            Document doc = documentService.getDocumentById(jobDoc.getDocumentId());
-
-            if (doc.getStatus() == DocumentStatus.PLACEHOLDER) {
-                jobDoc.setStatus(OcrDocumentStatus.SKIPPED);
-                ocrJobDocumentRepository.save(jobDoc);
-                job.setSkippedCount(job.getSkippedCount() + 1);
-                ocrJobRepository.save(job);
-                ocrProgressService.emit(jobId, "document", Map.of(
-                        "documentId", jobDoc.getDocumentId(),
-                        "status", "SKIPPED",
-                        "processed", job.getProcessedDocuments(),
-                        "total", job.getTotalDocuments()));
-                continue;
-            }
-
-            jobDoc.setStatus(OcrDocumentStatus.RUNNING);
-            ocrJobDocumentRepository.save(jobDoc);
-
-            try {
-                ocrService.processDocument(jobDoc.getDocumentId(), doc, userId);
-                jobDoc.setStatus(OcrDocumentStatus.DONE);
-                job.setProcessedDocuments(job.getProcessedDocuments() + 1);
-            } catch (Exception e) {
-                log.error("OCR batch: failed document {}", jobDoc.getDocumentId(), e);
-                jobDoc.setStatus(OcrDocumentStatus.FAILED);
-                jobDoc.setErrorMessage(e.getMessage());
-                job.setErrorCount(job.getErrorCount() + 1);
-            }
-
-            ocrJobDocumentRepository.save(jobDoc);
-            ocrJobRepository.save(job);
-
-            ocrProgressService.emit(jobId, "document", Map.of(
-                    "documentId", jobDoc.getDocumentId(),
-                    "status", jobDoc.getStatus().name(),
-                    "processed", job.getProcessedDocuments(),
-                    "total", job.getTotalDocuments()));
-        }
-
-        job.setStatus(OcrJobStatus.DONE);
-        ocrJobRepository.save(job);
-
-        ocrProgressService.emit(jobId, "done", Map.of(
-                "processed", job.getProcessedDocuments(),
-                "errors", job.getErrorCount(),
-                "skipped", job.getSkippedCount()));
-        ocrProgressService.complete(jobId);
-    }
 }
--- a/backend/src/main/java/org/raddatz/familienarchiv/service/OcrService.java
+++ b/backend/src/main/java/org/raddatz/familienarchiv/service/OcrService.java
@@ -2,16 +2,12 @@ package org.raddatz.familienarchiv.service;

 import lombok.RequiredArgsConstructor;
 import lombok.extern.slf4j.Slf4j;
-import org.raddatz.familienarchiv.dto.CreateAnnotationDTO;
 import org.raddatz.familienarchiv.exception.DomainException;
 import org.raddatz.familienarchiv.exception.ErrorCode;
 import org.raddatz.familienarchiv.model.*;
 import org.raddatz.familienarchiv.repository.OcrJobRepository;
-import org.raddatz.familienarchiv.repository.TranscriptionBlockRepository;
 import org.springframework.stereotype.Service;
-import org.springframework.transaction.annotation.Transactional;

-import java.util.List;
 import java.util.UUID;

@Service
@@ -19,18 +15,11 @@ import java.util.UUID;
@Slf4j
 public class OcrService {

-    private static final String OCR_ANNOTATION_COLOR = "#00C7B1";
-
-    private final OcrClient ocrClient;
    private final OcrHealthClient ocrHealthClient;
    private final DocumentService documentService;
-    private final TranscriptionService transcriptionService;
-    private final AnnotationService annotationService;
-    private final TranscriptionBlockRepository blockRepository;
    private final OcrJobRepository ocrJobRepository;
-    private final FileService fileService;
+    private final OcrAsyncRunner ocrAsyncRunner;

-    @Transactional
    public UUID startOcr(UUID documentId, ScriptType scriptTypeOverride, UUID userId) {
        Document doc = documentService.getDocumentById(documentId);

@@ -51,62 +40,11 @@ public class OcrService {
        OcrJob job = OcrJob.builder()
                .totalDocuments(1)
                .createdBy(userId)
-                .status(OcrJobStatus.RUNNING)
+                .status(OcrJobStatus.PENDING)
                .build();
        job = ocrJobRepository.save(job);

-        try {
-            processDocument(documentId, doc, userId);
-            job.setStatus(OcrJobStatus.DONE);
-            job.setProcessedDocuments(1);
-        } catch (Exception e) {
-            log.error("OCR processing failed for document {}", documentId, e);
-            job.setStatus(OcrJobStatus.FAILED);
-            job.setErrorCount(1);
-        }
-
-        ocrJobRepository.save(job);
+        ocrAsyncRunner.runSingleDocument(job.getId(), documentId, userId);
        return job.getId();
    }
-
-    void processDocument(UUID documentId, Document doc, UUID userId) {
-        clearExistingBlocks(documentId);
-
-        String pdfUrl = fileService.generatePresignedUrl(doc.getFilePath());
-        List<OcrBlockResult> blocks = ocrClient.extractBlocks(pdfUrl, doc.getScriptType());
-        createTranscriptionBlocks(documentId, blocks, userId, doc.getFileHash());
-    }
-
-    private void clearExistingBlocks(UUID documentId) {
-        List<TranscriptionBlock> existing = transcriptionService.listBlocks(documentId);
-        for (TranscriptionBlock block : existing) {
-            transcriptionService.deleteBlock(documentId, block.getId());
-        }
-    }
-
-    private void createTranscriptionBlocks(UUID documentId, List<OcrBlockResult> blocks,
-                                            UUID userId, String fileHash) {
-        for (int i = 0; i < blocks.size(); i++) {
-            OcrBlockResult block = blocks.get(i);
-
-            CreateAnnotationDTO annotationDTO = new CreateAnnotationDTO(
-                    block.pageNumber(), block.x(), block.y(),
-                    block.width(), block.height(), OCR_ANNOTATION_COLOR);
-
-            DocumentAnnotation annotation = annotationService.createOcrAnnotation(
-                    documentId, annotationDTO, userId, fileHash, block.polygon());
-
-            TranscriptionBlock transcriptionBlock = TranscriptionBlock.builder()
-                    .annotationId(annotation.getId())
-                    .documentId(documentId)
-                    .text(block.text() != null ? block.text() : "")
-                    .sortOrder(i)
-                    .source(BlockSource.OCR)
-                    .createdBy(userId)
-                    .updatedBy(userId)
-                    .build();
-            blockRepository.save(transcriptionBlock);
-        }
-    }
-
 }