docs(ocr): add Admin OCR overview & model-detail UI spec
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -9,10 +9,12 @@ import org.raddatz.familienarchiv.repository.OcrJobRepository;
|
|||||||
import org.springframework.scheduling.annotation.Async;
|
import org.springframework.scheduling.annotation.Async;
|
||||||
import org.springframework.stereotype.Component;
|
import org.springframework.stereotype.Component;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.UUID;
|
import java.util.UUID;
|
||||||
import java.util.concurrent.atomic.AtomicInteger;
|
import java.util.concurrent.atomic.AtomicInteger;
|
||||||
|
import java.util.concurrent.atomic.AtomicReference;
|
||||||
|
|
||||||
@Component
|
@Component
|
||||||
@RequiredArgsConstructor
|
@RequiredArgsConstructor
|
||||||
@@ -29,6 +31,7 @@ public class OcrAsyncRunner {
|
|||||||
private final OcrJobRepository ocrJobRepository;
|
private final OcrJobRepository ocrJobRepository;
|
||||||
private final OcrJobDocumentRepository ocrJobDocumentRepository;
|
private final OcrJobDocumentRepository ocrJobDocumentRepository;
|
||||||
private final OcrProgressService ocrProgressService;
|
private final OcrProgressService ocrProgressService;
|
||||||
|
private final SenderModelService senderModelService;
|
||||||
|
|
||||||
@Async
|
@Async
|
||||||
public void runSingleDocument(UUID jobId, UUID documentId, UUID userId) {
|
public void runSingleDocument(UUID jobId, UUID documentId, UUID userId) {
|
||||||
@@ -68,12 +71,18 @@ public class OcrAsyncRunner {
|
|||||||
|
|
||||||
String pdfUrl = fileService.generatePresignedUrl(doc.getFilePath());
|
String pdfUrl = fileService.generatePresignedUrl(doc.getFilePath());
|
||||||
|
|
||||||
|
String senderModelPath = null;
|
||||||
|
if (doc.getSender() != null && doc.getScriptType() == ScriptType.HANDWRITING_KURRENT) {
|
||||||
|
senderModelPath = senderModelService.maybeGetModelPath(doc.getSender().getId()).orElse(null);
|
||||||
|
}
|
||||||
|
|
||||||
AtomicInteger blockCounter = new AtomicInteger(0);
|
AtomicInteger blockCounter = new AtomicInteger(0);
|
||||||
AtomicInteger currentPage = new AtomicInteger(0);
|
AtomicInteger currentPage = new AtomicInteger(0);
|
||||||
AtomicInteger skippedPages = new AtomicInteger(0);
|
AtomicInteger skippedPages = new AtomicInteger(0);
|
||||||
AtomicInteger totalPages = new AtomicInteger(0);
|
AtomicInteger totalPages = new AtomicInteger(0);
|
||||||
|
|
||||||
ocrClient.streamBlocks(pdfUrl, doc.getScriptType(), regions, event -> {
|
final String finalSenderModelPath = senderModelPath;
|
||||||
|
ocrClient.streamBlocks(pdfUrl, doc.getScriptType(), regions, finalSenderModelPath, event -> {
|
||||||
switch (event) {
|
switch (event) {
|
||||||
case OcrStreamEvent.Start start -> {
|
case OcrStreamEvent.Start start -> {
|
||||||
totalPages.set(start.totalPages());
|
totalPages.set(start.totalPages());
|
||||||
@@ -207,7 +216,25 @@ public class OcrAsyncRunner {
|
|||||||
clearExistingBlocks(documentId);
|
clearExistingBlocks(documentId);
|
||||||
|
|
||||||
String pdfUrl = fileService.generatePresignedUrl(doc.getFilePath());
|
String pdfUrl = fileService.generatePresignedUrl(doc.getFilePath());
|
||||||
List<OcrBlockResult> blocks = ocrClient.extractBlocks(pdfUrl, doc.getScriptType());
|
|
||||||
|
String senderModelPath = null;
|
||||||
|
if (doc.getSender() != null && doc.getScriptType() == ScriptType.HANDWRITING_KURRENT) {
|
||||||
|
senderModelPath = senderModelService.maybeGetModelPath(doc.getSender().getId()).orElse(null);
|
||||||
|
}
|
||||||
|
|
||||||
|
final AtomicReference<List<OcrBlockResult>> blocksRef = new AtomicReference<>();
|
||||||
|
final String finalSenderModelPath = senderModelPath;
|
||||||
|
ocrClient.streamBlocks(pdfUrl, doc.getScriptType(), null, finalSenderModelPath, event -> {
|
||||||
|
switch (event) {
|
||||||
|
case OcrStreamEvent.Page page -> {
|
||||||
|
blocksRef.compareAndSet(null, new ArrayList<>());
|
||||||
|
blocksRef.get().addAll(page.blocks());
|
||||||
|
}
|
||||||
|
default -> {}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
List<OcrBlockResult> blocks = blocksRef.get() != null ? blocksRef.get() : List.of();
|
||||||
createTranscriptionBlocks(documentId, blocks, userId, doc.getFileHash());
|
createTranscriptionBlocks(documentId, blocks, userId, doc.getFileHash());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
1101
docs/specs/ocr-admin-spec.html
Normal file
1101
docs/specs/ocr-admin-spec.html
Normal file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user