docs(ocr): add Admin OCR overview & model-detail UI spec
Some checks failed
CI / Unit & Component Tests (push) Failing after 2m36s
CI / OCR Service Tests (push) Successful in 27s
CI / Backend Unit Tests (push) Failing after 1m22s

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Marcel
2026-04-17 19:11:07 +02:00
parent c5e6ed922b
commit 78eca8e9a1
2 changed files with 1130 additions and 2 deletions

View File

@@ -9,10 +9,12 @@ import org.raddatz.familienarchiv.repository.OcrJobRepository;
import org.springframework.scheduling.annotation.Async;
import org.springframework.stereotype.Component;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.UUID;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicReference;
@Component
@RequiredArgsConstructor
@@ -29,6 +31,7 @@ public class OcrAsyncRunner {
private final OcrJobRepository ocrJobRepository;
private final OcrJobDocumentRepository ocrJobDocumentRepository;
private final OcrProgressService ocrProgressService;
private final SenderModelService senderModelService;
@Async
public void runSingleDocument(UUID jobId, UUID documentId, UUID userId) {
@@ -68,12 +71,18 @@ public class OcrAsyncRunner {
String pdfUrl = fileService.generatePresignedUrl(doc.getFilePath());
String senderModelPath = null;
if (doc.getSender() != null && doc.getScriptType() == ScriptType.HANDWRITING_KURRENT) {
senderModelPath = senderModelService.maybeGetModelPath(doc.getSender().getId()).orElse(null);
}
AtomicInteger blockCounter = new AtomicInteger(0);
AtomicInteger currentPage = new AtomicInteger(0);
AtomicInteger skippedPages = new AtomicInteger(0);
AtomicInteger totalPages = new AtomicInteger(0);
ocrClient.streamBlocks(pdfUrl, doc.getScriptType(), regions, event -> {
final String finalSenderModelPath = senderModelPath;
ocrClient.streamBlocks(pdfUrl, doc.getScriptType(), regions, finalSenderModelPath, event -> {
switch (event) {
case OcrStreamEvent.Start start -> {
totalPages.set(start.totalPages());
@@ -207,7 +216,25 @@ public class OcrAsyncRunner {
clearExistingBlocks(documentId);
String pdfUrl = fileService.generatePresignedUrl(doc.getFilePath());
List<OcrBlockResult> blocks = ocrClient.extractBlocks(pdfUrl, doc.getScriptType());
String senderModelPath = null;
if (doc.getSender() != null && doc.getScriptType() == ScriptType.HANDWRITING_KURRENT) {
senderModelPath = senderModelService.maybeGetModelPath(doc.getSender().getId()).orElse(null);
}
final AtomicReference<List<OcrBlockResult>> blocksRef = new AtomicReference<>();
final String finalSenderModelPath = senderModelPath;
ocrClient.streamBlocks(pdfUrl, doc.getScriptType(), null, finalSenderModelPath, event -> {
switch (event) {
case OcrStreamEvent.Page page -> {
blocksRef.compareAndSet(null, new ArrayList<>());
blocksRef.get().addAll(page.blocks());
}
default -> {}
}
});
List<OcrBlockResult> blocks = blocksRef.get() != null ? blocksRef.get() : List.of();
createTranscriptionBlocks(documentId, blocks, userId, doc.getFileHash());
}

File diff suppressed because it is too large Load Diff