docs(ocr): add Admin OCR overview & model-detail UI spec
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -9,10 +9,12 @@ import org.raddatz.familienarchiv.repository.OcrJobRepository;
|
||||
import org.springframework.scheduling.annotation.Async;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.UUID;
|
||||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
import java.util.concurrent.atomic.AtomicReference;
|
||||
|
||||
@Component
|
||||
@RequiredArgsConstructor
|
||||
@@ -29,6 +31,7 @@ public class OcrAsyncRunner {
|
||||
private final OcrJobRepository ocrJobRepository;
|
||||
private final OcrJobDocumentRepository ocrJobDocumentRepository;
|
||||
private final OcrProgressService ocrProgressService;
|
||||
private final SenderModelService senderModelService;
|
||||
|
||||
@Async
|
||||
public void runSingleDocument(UUID jobId, UUID documentId, UUID userId) {
|
||||
@@ -68,12 +71,18 @@ public class OcrAsyncRunner {
|
||||
|
||||
String pdfUrl = fileService.generatePresignedUrl(doc.getFilePath());
|
||||
|
||||
String senderModelPath = null;
|
||||
if (doc.getSender() != null && doc.getScriptType() == ScriptType.HANDWRITING_KURRENT) {
|
||||
senderModelPath = senderModelService.maybeGetModelPath(doc.getSender().getId()).orElse(null);
|
||||
}
|
||||
|
||||
AtomicInteger blockCounter = new AtomicInteger(0);
|
||||
AtomicInteger currentPage = new AtomicInteger(0);
|
||||
AtomicInteger skippedPages = new AtomicInteger(0);
|
||||
AtomicInteger totalPages = new AtomicInteger(0);
|
||||
|
||||
ocrClient.streamBlocks(pdfUrl, doc.getScriptType(), regions, event -> {
|
||||
final String finalSenderModelPath = senderModelPath;
|
||||
ocrClient.streamBlocks(pdfUrl, doc.getScriptType(), regions, finalSenderModelPath, event -> {
|
||||
switch (event) {
|
||||
case OcrStreamEvent.Start start -> {
|
||||
totalPages.set(start.totalPages());
|
||||
@@ -207,7 +216,25 @@ public class OcrAsyncRunner {
|
||||
clearExistingBlocks(documentId);
|
||||
|
||||
String pdfUrl = fileService.generatePresignedUrl(doc.getFilePath());
|
||||
List<OcrBlockResult> blocks = ocrClient.extractBlocks(pdfUrl, doc.getScriptType());
|
||||
|
||||
String senderModelPath = null;
|
||||
if (doc.getSender() != null && doc.getScriptType() == ScriptType.HANDWRITING_KURRENT) {
|
||||
senderModelPath = senderModelService.maybeGetModelPath(doc.getSender().getId()).orElse(null);
|
||||
}
|
||||
|
||||
final AtomicReference<List<OcrBlockResult>> blocksRef = new AtomicReference<>();
|
||||
final String finalSenderModelPath = senderModelPath;
|
||||
ocrClient.streamBlocks(pdfUrl, doc.getScriptType(), null, finalSenderModelPath, event -> {
|
||||
switch (event) {
|
||||
case OcrStreamEvent.Page page -> {
|
||||
blocksRef.compareAndSet(null, new ArrayList<>());
|
||||
blocksRef.get().addAll(page.blocks());
|
||||
}
|
||||
default -> {}
|
||||
}
|
||||
});
|
||||
|
||||
List<OcrBlockResult> blocks = blocksRef.get() != null ? blocksRef.get() : List.of();
|
||||
createTranscriptionBlocks(documentId, blocks, userId, doc.getFileHash());
|
||||
}
|
||||
|
||||
|
||||
1101
docs/specs/ocr-admin-spec.html
Normal file
1101
docs/specs/ocr-admin-spec.html
Normal file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user