feat: OCR pipeline with NDJSON streaming and real-time progress (#226, #227, #231) #229

Merged
marcel merged 74 commits from feat/issue-226-227-ocr-pipeline-polygon into main 2026-04-13 12:39:04 +02:00
Showing only changes of commit 6823973429 - Show all commits

View File

@@ -162,25 +162,28 @@ public class OcrAsyncRunner {
private void createTranscriptionBlocks(UUID documentId, List<OcrBlockResult> blocks,
UUID userId, String fileHash) {
for (int i = 0; i < blocks.size(); i++) {
OcrBlockResult block = blocks.get(i);
CreateAnnotationDTO annotationDTO = new CreateAnnotationDTO(
block.pageNumber(), block.x(), block.y(),
block.width(), block.height(), OCR_ANNOTATION_COLOR);
DocumentAnnotation annotation = annotationService.createOcrAnnotation(
documentId, annotationDTO, userId, fileHash, block.polygon());
TranscriptionBlock transcriptionBlock = TranscriptionBlock.builder()
.annotationId(annotation.getId())
.documentId(documentId)
.text(block.text() != null ? block.text() : "")
.sortOrder(i)
.source(BlockSource.OCR)
.createdBy(userId)
.updatedBy(userId)
.build();
blockRepository.save(transcriptionBlock);
createSingleBlock(documentId, blocks.get(i), userId, fileHash, i);
}
}
void createSingleBlock(UUID documentId, OcrBlockResult block,
UUID userId, String fileHash, int sortOrder) {
CreateAnnotationDTO annotationDTO = new CreateAnnotationDTO(
block.pageNumber(), block.x(), block.y(),
block.width(), block.height(), OCR_ANNOTATION_COLOR);
DocumentAnnotation annotation = annotationService.createOcrAnnotation(
documentId, annotationDTO, userId, fileHash, block.polygon());
TranscriptionBlock transcriptionBlock = TranscriptionBlock.builder()
.annotationId(annotation.getId())
.documentId(documentId)
.text(block.text() != null ? block.text() : "")
.sortOrder(sortOrder)
.source(BlockSource.OCR)
.createdBy(userId)
.updatedBy(userId)
.build();
blockRepository.save(transcriptionBlock);
}
}