refactor(ocr): route block lifecycle through TranscriptionService
OcrAsyncRunner was bypassing TranscriptionService — building blocks directly and calling blockRepository.save(), skipping sanitizeText() and saveVersion(). Also replaced N individual deleteBlock() calls with a single bulk deleteAllBlocksByDocument() for OCR re-runs. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -6,7 +6,6 @@ import org.raddatz.familienarchiv.dto.CreateAnnotationDTO;
|
||||
import org.raddatz.familienarchiv.model.*;
|
||||
import org.raddatz.familienarchiv.repository.OcrJobDocumentRepository;
|
||||
import org.raddatz.familienarchiv.repository.OcrJobRepository;
|
||||
import org.raddatz.familienarchiv.repository.TranscriptionBlockRepository;
|
||||
import org.springframework.scheduling.annotation.Async;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
@@ -26,7 +25,6 @@ public class OcrAsyncRunner {
|
||||
private final DocumentService documentService;
|
||||
private final TranscriptionService transcriptionService;
|
||||
private final AnnotationService annotationService;
|
||||
private final TranscriptionBlockRepository blockRepository;
|
||||
private final FileService fileService;
|
||||
private final OcrJobRepository ocrJobRepository;
|
||||
private final OcrJobDocumentRepository ocrJobDocumentRepository;
|
||||
@@ -194,10 +192,7 @@ public class OcrAsyncRunner {
|
||||
}
|
||||
|
||||
private void clearExistingBlocks(UUID documentId) {
|
||||
List<TranscriptionBlock> existing = transcriptionService.listBlocks(documentId);
|
||||
for (TranscriptionBlock block : existing) {
|
||||
transcriptionService.deleteBlock(documentId, block.getId());
|
||||
}
|
||||
transcriptionService.deleteAllBlocksByDocument(documentId);
|
||||
}
|
||||
|
||||
private void createTranscriptionBlocks(UUID documentId, List<OcrBlockResult> blocks,
|
||||
@@ -216,15 +211,7 @@ public class OcrAsyncRunner {
|
||||
DocumentAnnotation annotation = annotationService.createOcrAnnotation(
|
||||
documentId, annotationDTO, userId, fileHash, block.polygon());
|
||||
|
||||
TranscriptionBlock transcriptionBlock = TranscriptionBlock.builder()
|
||||
.annotationId(annotation.getId())
|
||||
.documentId(documentId)
|
||||
.text(block.text() != null ? block.text() : "")
|
||||
.sortOrder(sortOrder)
|
||||
.source(BlockSource.OCR)
|
||||
.createdBy(userId)
|
||||
.updatedBy(userId)
|
||||
.build();
|
||||
blockRepository.save(transcriptionBlock);
|
||||
transcriptionService.createOcrBlock(documentId, annotation.getId(),
|
||||
block.text(), sortOrder, userId);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -8,6 +8,7 @@ import org.raddatz.familienarchiv.dto.ReorderTranscriptionBlocksDTO;
|
||||
import org.raddatz.familienarchiv.dto.UpdateTranscriptionBlockDTO;
|
||||
import org.raddatz.familienarchiv.exception.DomainException;
|
||||
import org.raddatz.familienarchiv.exception.ErrorCode;
|
||||
import org.raddatz.familienarchiv.model.BlockSource;
|
||||
import org.raddatz.familienarchiv.model.Document;
|
||||
import org.raddatz.familienarchiv.model.DocumentAnnotation;
|
||||
import org.raddatz.familienarchiv.model.TranscriptionBlock;
|
||||
@@ -75,6 +76,24 @@ public class TranscriptionService {
|
||||
return saved;
|
||||
}
|
||||
|
||||
@Transactional
|
||||
public TranscriptionBlock createOcrBlock(UUID documentId, UUID annotationId,
|
||||
String text, int sortOrder, UUID userId) {
|
||||
String sanitized = sanitizeText(text);
|
||||
TranscriptionBlock block = TranscriptionBlock.builder()
|
||||
.annotationId(annotationId)
|
||||
.documentId(documentId)
|
||||
.text(sanitized)
|
||||
.sortOrder(sortOrder)
|
||||
.source(BlockSource.OCR)
|
||||
.createdBy(userId)
|
||||
.updatedBy(userId)
|
||||
.build();
|
||||
TranscriptionBlock saved = blockRepository.save(block);
|
||||
saveVersion(saved, userId);
|
||||
return saved;
|
||||
}
|
||||
|
||||
@Transactional
|
||||
public TranscriptionBlock updateBlock(UUID documentId, UUID blockId,
|
||||
UpdateTranscriptionBlockDTO dto, UUID userId) {
|
||||
@@ -106,6 +125,21 @@ public class TranscriptionService {
|
||||
blockId, annotationId, documentId);
|
||||
}
|
||||
|
||||
@Transactional
|
||||
public void deleteAllBlocksByDocument(UUID documentId) {
|
||||
List<TranscriptionBlock> blocks = blockRepository.findByDocumentIdOrderBySortOrderAsc(documentId);
|
||||
if (blocks.isEmpty()) return;
|
||||
|
||||
List<UUID> annotationIds = blocks.stream()
|
||||
.map(TranscriptionBlock::getAnnotationId)
|
||||
.toList();
|
||||
|
||||
blockRepository.deleteAll(blocks);
|
||||
blockRepository.flush();
|
||||
annotationRepository.deleteAllById(annotationIds);
|
||||
log.info("Bulk-deleted {} transcription blocks for document {}", blocks.size(), documentId);
|
||||
}
|
||||
|
||||
@Transactional
|
||||
public void reorderBlocks(UUID documentId, ReorderTranscriptionBlocksDTO dto) {
|
||||
List<UUID> blockIds = dto.getBlockIds();
|
||||
|
||||
@@ -2,7 +2,6 @@ package org.raddatz.familienarchiv.service;
|
||||
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.junit.jupiter.api.extension.ExtendWith;
|
||||
import org.mockito.ArgumentCaptor;
|
||||
import org.mockito.InjectMocks;
|
||||
import org.mockito.Mock;
|
||||
import org.mockito.junit.jupiter.MockitoExtension;
|
||||
@@ -10,7 +9,6 @@ import org.raddatz.familienarchiv.dto.CreateAnnotationDTO;
|
||||
import org.raddatz.familienarchiv.model.*;
|
||||
import org.raddatz.familienarchiv.repository.OcrJobDocumentRepository;
|
||||
import org.raddatz.familienarchiv.repository.OcrJobRepository;
|
||||
import org.raddatz.familienarchiv.repository.TranscriptionBlockRepository;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
@@ -19,8 +17,7 @@ import java.util.UUID;
|
||||
import java.util.function.Consumer;
|
||||
|
||||
import static org.assertj.core.api.Assertions.assertThat;
|
||||
import static org.mockito.ArgumentMatchers.any;
|
||||
import static org.mockito.ArgumentMatchers.eq;
|
||||
import static org.mockito.ArgumentMatchers.*;
|
||||
import static org.mockito.Mockito.*;
|
||||
|
||||
@ExtendWith(MockitoExtension.class)
|
||||
@@ -30,7 +27,6 @@ class OcrAsyncRunnerTest {
|
||||
@Mock DocumentService documentService;
|
||||
@Mock TranscriptionService transcriptionService;
|
||||
@Mock AnnotationService annotationService;
|
||||
@Mock TranscriptionBlockRepository blockRepository;
|
||||
@Mock FileService fileService;
|
||||
@Mock OcrJobRepository ocrJobRepository;
|
||||
@Mock OcrJobDocumentRepository ocrJobDocumentRepository;
|
||||
@@ -42,61 +38,59 @@ class OcrAsyncRunnerTest {
|
||||
void processDocument_clearsExistingBlocks() {
|
||||
UUID docId = UUID.randomUUID();
|
||||
UUID userId = UUID.randomUUID();
|
||||
TranscriptionBlock existing = TranscriptionBlock.builder()
|
||||
.id(UUID.randomUUID()).documentId(docId).build();
|
||||
Document doc = Document.builder().id(docId).filePath("test.pdf")
|
||||
.fileHash("hash").scriptType(ScriptType.TYPEWRITER).build();
|
||||
|
||||
when(transcriptionService.listBlocks(docId)).thenReturn(List.of(existing));
|
||||
when(fileService.generatePresignedUrl(any())).thenReturn("http://presigned");
|
||||
when(ocrClient.extractBlocks(any(), any())).thenReturn(List.of());
|
||||
|
||||
ocrAsyncRunner.processDocument(docId, doc, userId);
|
||||
|
||||
verify(transcriptionService).deleteBlock(docId, existing.getId());
|
||||
verify(transcriptionService).deleteAllBlocksByDocument(docId);
|
||||
}
|
||||
|
||||
@Test
|
||||
void processDocument_createsAnnotationAndBlock_forEachResult() {
|
||||
UUID docId = UUID.randomUUID();
|
||||
UUID userId = UUID.randomUUID();
|
||||
UUID annId = UUID.randomUUID();
|
||||
Document doc = Document.builder().id(docId).filePath("test.pdf")
|
||||
.fileHash("hash").scriptType(ScriptType.TYPEWRITER).build();
|
||||
|
||||
when(transcriptionService.listBlocks(docId)).thenReturn(List.of());
|
||||
|
||||
when(fileService.generatePresignedUrl(any())).thenReturn("http://presigned");
|
||||
when(ocrClient.extractBlocks(any(), any())).thenReturn(List.of(
|
||||
new OcrBlockResult(0, 0.1, 0.1, 0.8, 0.04, null, "Line 1"),
|
||||
new OcrBlockResult(0, 0.1, 0.2, 0.8, 0.04, null, "Line 2")));
|
||||
DocumentAnnotation ann = DocumentAnnotation.builder().id(UUID.randomUUID()).build();
|
||||
DocumentAnnotation ann = DocumentAnnotation.builder().id(annId).build();
|
||||
when(annotationService.createOcrAnnotation(any(), any(), any(), any(), any())).thenReturn(ann);
|
||||
|
||||
ocrAsyncRunner.processDocument(docId, doc, userId);
|
||||
|
||||
verify(annotationService, times(2)).createOcrAnnotation(
|
||||
eq(docId), any(CreateAnnotationDTO.class), eq(userId), eq("hash"), any());
|
||||
verify(blockRepository, times(2)).save(any());
|
||||
verify(transcriptionService, times(2)).createOcrBlock(
|
||||
eq(docId), eq(annId), any(), anyInt(), eq(userId));
|
||||
}
|
||||
|
||||
@Test
|
||||
void processDocument_setsBlockSourceToOcr() {
|
||||
void processDocument_delegatesBlockCreationToTranscriptionService() {
|
||||
UUID docId = UUID.randomUUID();
|
||||
UUID userId = UUID.randomUUID();
|
||||
UUID annId = UUID.randomUUID();
|
||||
Document doc = Document.builder().id(docId).filePath("test.pdf")
|
||||
.fileHash("hash").scriptType(ScriptType.TYPEWRITER).build();
|
||||
|
||||
when(transcriptionService.listBlocks(docId)).thenReturn(List.of());
|
||||
|
||||
when(fileService.generatePresignedUrl(any())).thenReturn("http://presigned");
|
||||
when(ocrClient.extractBlocks(any(), any())).thenReturn(List.of(
|
||||
new OcrBlockResult(0, 0.1, 0.1, 0.8, 0.04, null, "Test")));
|
||||
DocumentAnnotation ann = DocumentAnnotation.builder().id(UUID.randomUUID()).build();
|
||||
DocumentAnnotation ann = DocumentAnnotation.builder().id(annId).build();
|
||||
when(annotationService.createOcrAnnotation(any(), any(), any(), any(), any())).thenReturn(ann);
|
||||
|
||||
ocrAsyncRunner.processDocument(docId, doc, userId);
|
||||
|
||||
ArgumentCaptor<TranscriptionBlock> captor = ArgumentCaptor.forClass(TranscriptionBlock.class);
|
||||
verify(blockRepository).save(captor.capture());
|
||||
assertThat(captor.getValue().getSource()).isEqualTo(BlockSource.OCR);
|
||||
verify(transcriptionService).createOcrBlock(docId, annId, "Test", 0, userId);
|
||||
}
|
||||
|
||||
@Test
|
||||
@@ -115,7 +109,7 @@ class OcrAsyncRunnerTest {
|
||||
when(ocrJobDocumentRepository.findByJobIdAndDocumentId(jobId, docId))
|
||||
.thenReturn(Optional.of(jobDoc));
|
||||
when(documentService.getDocumentById(docId)).thenReturn(doc);
|
||||
when(transcriptionService.listBlocks(docId)).thenReturn(List.of());
|
||||
|
||||
when(fileService.generatePresignedUrl(any())).thenReturn("http://presigned");
|
||||
doAnswer(inv -> {
|
||||
Consumer<OcrStreamEvent> handler = inv.getArgument(2);
|
||||
@@ -146,7 +140,7 @@ class OcrAsyncRunnerTest {
|
||||
when(ocrJobDocumentRepository.findByJobIdAndDocumentId(jobId, docId))
|
||||
.thenReturn(Optional.of(jobDoc));
|
||||
when(documentService.getDocumentById(docId)).thenReturn(doc);
|
||||
when(transcriptionService.listBlocks(docId)).thenReturn(List.of());
|
||||
|
||||
when(fileService.generatePresignedUrl(any())).thenReturn("http://presigned");
|
||||
doThrow(new RuntimeException("OCR failed")).when(ocrClient).streamBlocks(any(), any(), any());
|
||||
|
||||
@@ -174,7 +168,7 @@ class OcrAsyncRunnerTest {
|
||||
.thenReturn(Optional.of(jobDoc));
|
||||
when(ocrJobDocumentRepository.save(any())).thenAnswer(inv -> inv.getArgument(0));
|
||||
when(documentService.getDocumentById(docId)).thenReturn(doc);
|
||||
when(transcriptionService.listBlocks(docId)).thenReturn(List.of());
|
||||
|
||||
when(fileService.generatePresignedUrl(any())).thenReturn("http://presigned");
|
||||
when(annotationService.createOcrAnnotation(any(), any(), any(), any(), any())).thenReturn(ann);
|
||||
|
||||
@@ -217,7 +211,7 @@ class OcrAsyncRunnerTest {
|
||||
.thenReturn(Optional.of(jobDoc));
|
||||
when(ocrJobDocumentRepository.save(any())).thenAnswer(inv -> inv.getArgument(0));
|
||||
when(documentService.getDocumentById(docId)).thenReturn(doc);
|
||||
when(transcriptionService.listBlocks(docId)).thenReturn(List.of());
|
||||
|
||||
when(fileService.generatePresignedUrl(any())).thenReturn("http://presigned");
|
||||
|
||||
doAnswer(inv -> {
|
||||
@@ -253,7 +247,7 @@ class OcrAsyncRunnerTest {
|
||||
.thenReturn(Optional.of(jobDoc));
|
||||
when(ocrJobDocumentRepository.save(any())).thenAnswer(inv -> inv.getArgument(0));
|
||||
when(documentService.getDocumentById(docId)).thenReturn(doc);
|
||||
when(transcriptionService.listBlocks(docId)).thenReturn(List.of());
|
||||
|
||||
when(fileService.generatePresignedUrl(any())).thenReturn("http://presigned");
|
||||
|
||||
doAnswer(inv -> {
|
||||
|
||||
@@ -10,6 +10,7 @@ import org.raddatz.familienarchiv.dto.CreateTranscriptionBlockDTO;
|
||||
import org.raddatz.familienarchiv.dto.ReorderTranscriptionBlocksDTO;
|
||||
import org.raddatz.familienarchiv.dto.UpdateTranscriptionBlockDTO;
|
||||
import org.raddatz.familienarchiv.exception.DomainException;
|
||||
import org.raddatz.familienarchiv.model.BlockSource;
|
||||
import org.raddatz.familienarchiv.model.Document;
|
||||
import org.raddatz.familienarchiv.model.DocumentAnnotation;
|
||||
import org.raddatz.familienarchiv.model.TranscriptionBlock;
|
||||
@@ -26,8 +27,8 @@ import static org.assertj.core.api.Assertions.assertThat;
|
||||
import static org.assertj.core.api.Assertions.assertThatThrownBy;
|
||||
import static org.mockito.ArgumentMatchers.any;
|
||||
import static org.mockito.ArgumentMatchers.eq;
|
||||
import static org.mockito.Mockito.verify;
|
||||
import static org.mockito.Mockito.when;
|
||||
import static org.mockito.Mockito.*;
|
||||
|
||||
import static org.springframework.http.HttpStatus.NOT_FOUND;
|
||||
|
||||
@ExtendWith(MockitoExtension.class)
|
||||
@@ -99,6 +100,50 @@ class TranscriptionServiceTest {
|
||||
verify(versionRepository).save(any(TranscriptionBlockVersion.class));
|
||||
}
|
||||
|
||||
// ─── createOcrBlock ──────────────────────────────────────────────────────────
|
||||
|
||||
@Test
|
||||
void createOcrBlock_createsBlockWithOcrSourceAndSavesVersion() {
|
||||
UUID docId = UUID.randomUUID();
|
||||
UUID annotId = UUID.randomUUID();
|
||||
UUID userId = UUID.randomUUID();
|
||||
|
||||
when(blockRepository.save(any())).thenAnswer(inv -> {
|
||||
TranscriptionBlock b = inv.getArgument(0);
|
||||
b.setId(UUID.randomUUID());
|
||||
return b;
|
||||
});
|
||||
|
||||
TranscriptionBlock result = transcriptionService.createOcrBlock(
|
||||
docId, annotId, "OCR text", 3, userId);
|
||||
|
||||
assertThat(result.getAnnotationId()).isEqualTo(annotId);
|
||||
assertThat(result.getDocumentId()).isEqualTo(docId);
|
||||
assertThat(result.getText()).isEqualTo("OCR text");
|
||||
assertThat(result.getSortOrder()).isEqualTo(3);
|
||||
assertThat(result.getSource()).isEqualTo(BlockSource.OCR);
|
||||
assertThat(result.getCreatedBy()).isEqualTo(userId);
|
||||
verify(versionRepository).save(any(TranscriptionBlockVersion.class));
|
||||
}
|
||||
|
||||
@Test
|
||||
void createOcrBlock_sanitizesNullText() {
|
||||
UUID docId = UUID.randomUUID();
|
||||
UUID annotId = UUID.randomUUID();
|
||||
UUID userId = UUID.randomUUID();
|
||||
|
||||
when(blockRepository.save(any())).thenAnswer(inv -> {
|
||||
TranscriptionBlock b = inv.getArgument(0);
|
||||
b.setId(UUID.randomUUID());
|
||||
return b;
|
||||
});
|
||||
|
||||
TranscriptionBlock result = transcriptionService.createOcrBlock(
|
||||
docId, annotId, null, 0, userId);
|
||||
|
||||
assertThat(result.getText()).isEmpty();
|
||||
}
|
||||
|
||||
// ─── updateBlock ─────────────────────────────────────────────────────────────
|
||||
|
||||
@Test
|
||||
@@ -168,6 +213,39 @@ class TranscriptionServiceTest {
|
||||
.satisfies(e -> assertThat(((DomainException) e).getStatus()).isEqualTo(NOT_FOUND));
|
||||
}
|
||||
|
||||
// ─── deleteAllBlocksByDocument ─────────────────────────────────────────────
|
||||
|
||||
@Test
|
||||
void deleteAllBlocksByDocument_deletesAllBlocksAndAnnotations() {
|
||||
UUID docId = UUID.randomUUID();
|
||||
UUID annId1 = UUID.randomUUID();
|
||||
UUID annId2 = UUID.randomUUID();
|
||||
|
||||
TranscriptionBlock block1 = TranscriptionBlock.builder()
|
||||
.id(UUID.randomUUID()).documentId(docId).annotationId(annId1).sortOrder(0).build();
|
||||
TranscriptionBlock block2 = TranscriptionBlock.builder()
|
||||
.id(UUID.randomUUID()).documentId(docId).annotationId(annId2).sortOrder(1).build();
|
||||
|
||||
when(blockRepository.findByDocumentIdOrderBySortOrderAsc(docId))
|
||||
.thenReturn(List.of(block1, block2));
|
||||
|
||||
transcriptionService.deleteAllBlocksByDocument(docId);
|
||||
|
||||
verify(blockRepository).deleteAll(List.of(block1, block2));
|
||||
verify(blockRepository).flush();
|
||||
verify(annotationRepository).deleteAllById(List.of(annId1, annId2));
|
||||
}
|
||||
|
||||
@Test
|
||||
void deleteAllBlocksByDocument_doesNothing_whenNoBlocksExist() {
|
||||
UUID docId = UUID.randomUUID();
|
||||
when(blockRepository.findByDocumentIdOrderBySortOrderAsc(docId)).thenReturn(List.of());
|
||||
|
||||
transcriptionService.deleteAllBlocksByDocument(docId);
|
||||
|
||||
verify(blockRepository, never()).deleteAll(any());
|
||||
}
|
||||
|
||||
// ─── reorderBlocks ───────────────────────────────────────────────────────────
|
||||
|
||||
@Test
|
||||
|
||||
Reference in New Issue
Block a user