diff --git a/backend/src/main/java/org/raddatz/familienarchiv/service/OcrAsyncRunner.java b/backend/src/main/java/org/raddatz/familienarchiv/service/OcrAsyncRunner.java index 5dbe4c7d..9100f58e 100644 --- a/backend/src/main/java/org/raddatz/familienarchiv/service/OcrAsyncRunner.java +++ b/backend/src/main/java/org/raddatz/familienarchiv/service/OcrAsyncRunner.java @@ -6,7 +6,6 @@ import org.raddatz.familienarchiv.dto.CreateAnnotationDTO; import org.raddatz.familienarchiv.model.*; import org.raddatz.familienarchiv.repository.OcrJobDocumentRepository; import org.raddatz.familienarchiv.repository.OcrJobRepository; -import org.raddatz.familienarchiv.repository.TranscriptionBlockRepository; import org.springframework.scheduling.annotation.Async; import org.springframework.stereotype.Component; @@ -26,7 +25,6 @@ public class OcrAsyncRunner { private final DocumentService documentService; private final TranscriptionService transcriptionService; private final AnnotationService annotationService; - private final TranscriptionBlockRepository blockRepository; private final FileService fileService; private final OcrJobRepository ocrJobRepository; private final OcrJobDocumentRepository ocrJobDocumentRepository; @@ -194,10 +192,7 @@ public class OcrAsyncRunner { } private void clearExistingBlocks(UUID documentId) { - List existing = transcriptionService.listBlocks(documentId); - for (TranscriptionBlock block : existing) { - transcriptionService.deleteBlock(documentId, block.getId()); - } + transcriptionService.deleteAllBlocksByDocument(documentId); } private void createTranscriptionBlocks(UUID documentId, List blocks, @@ -216,15 +211,7 @@ public class OcrAsyncRunner { DocumentAnnotation annotation = annotationService.createOcrAnnotation( documentId, annotationDTO, userId, fileHash, block.polygon()); - TranscriptionBlock transcriptionBlock = TranscriptionBlock.builder() - .annotationId(annotation.getId()) - .documentId(documentId) - .text(block.text() != null ? block.text() : "") - .sortOrder(sortOrder) - .source(BlockSource.OCR) - .createdBy(userId) - .updatedBy(userId) - .build(); - blockRepository.save(transcriptionBlock); + transcriptionService.createOcrBlock(documentId, annotation.getId(), + block.text(), sortOrder, userId); } } diff --git a/backend/src/main/java/org/raddatz/familienarchiv/service/TranscriptionService.java b/backend/src/main/java/org/raddatz/familienarchiv/service/TranscriptionService.java index 1f8126c1..c93c98a5 100644 --- a/backend/src/main/java/org/raddatz/familienarchiv/service/TranscriptionService.java +++ b/backend/src/main/java/org/raddatz/familienarchiv/service/TranscriptionService.java @@ -8,6 +8,7 @@ import org.raddatz.familienarchiv.dto.ReorderTranscriptionBlocksDTO; import org.raddatz.familienarchiv.dto.UpdateTranscriptionBlockDTO; import org.raddatz.familienarchiv.exception.DomainException; import org.raddatz.familienarchiv.exception.ErrorCode; +import org.raddatz.familienarchiv.model.BlockSource; import org.raddatz.familienarchiv.model.Document; import org.raddatz.familienarchiv.model.DocumentAnnotation; import org.raddatz.familienarchiv.model.TranscriptionBlock; @@ -75,6 +76,24 @@ public class TranscriptionService { return saved; } + @Transactional + public TranscriptionBlock createOcrBlock(UUID documentId, UUID annotationId, + String text, int sortOrder, UUID userId) { + String sanitized = sanitizeText(text); + TranscriptionBlock block = TranscriptionBlock.builder() + .annotationId(annotationId) + .documentId(documentId) + .text(sanitized) + .sortOrder(sortOrder) + .source(BlockSource.OCR) + .createdBy(userId) + .updatedBy(userId) + .build(); + TranscriptionBlock saved = blockRepository.save(block); + saveVersion(saved, userId); + return saved; + } + @Transactional public TranscriptionBlock updateBlock(UUID documentId, UUID blockId, UpdateTranscriptionBlockDTO dto, UUID userId) { @@ -106,6 +125,21 @@ public class TranscriptionService { blockId, annotationId, documentId); } + @Transactional + public void deleteAllBlocksByDocument(UUID documentId) { + List blocks = blockRepository.findByDocumentIdOrderBySortOrderAsc(documentId); + if (blocks.isEmpty()) return; + + List annotationIds = blocks.stream() + .map(TranscriptionBlock::getAnnotationId) + .toList(); + + blockRepository.deleteAll(blocks); + blockRepository.flush(); + annotationRepository.deleteAllById(annotationIds); + log.info("Bulk-deleted {} transcription blocks for document {}", blocks.size(), documentId); + } + @Transactional public void reorderBlocks(UUID documentId, ReorderTranscriptionBlocksDTO dto) { List blockIds = dto.getBlockIds(); diff --git a/backend/src/test/java/org/raddatz/familienarchiv/service/OcrAsyncRunnerTest.java b/backend/src/test/java/org/raddatz/familienarchiv/service/OcrAsyncRunnerTest.java index 018befc9..4c580c19 100644 --- a/backend/src/test/java/org/raddatz/familienarchiv/service/OcrAsyncRunnerTest.java +++ b/backend/src/test/java/org/raddatz/familienarchiv/service/OcrAsyncRunnerTest.java @@ -2,7 +2,6 @@ package org.raddatz.familienarchiv.service; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.extension.ExtendWith; -import org.mockito.ArgumentCaptor; import org.mockito.InjectMocks; import org.mockito.Mock; import org.mockito.junit.jupiter.MockitoExtension; @@ -10,7 +9,6 @@ import org.raddatz.familienarchiv.dto.CreateAnnotationDTO; import org.raddatz.familienarchiv.model.*; import org.raddatz.familienarchiv.repository.OcrJobDocumentRepository; import org.raddatz.familienarchiv.repository.OcrJobRepository; -import org.raddatz.familienarchiv.repository.TranscriptionBlockRepository; import java.util.ArrayList; import java.util.List; @@ -19,8 +17,7 @@ import java.util.UUID; import java.util.function.Consumer; import static org.assertj.core.api.Assertions.assertThat; -import static org.mockito.ArgumentMatchers.any; -import static org.mockito.ArgumentMatchers.eq; +import static org.mockito.ArgumentMatchers.*; import static org.mockito.Mockito.*; @ExtendWith(MockitoExtension.class) @@ -30,7 +27,6 @@ class OcrAsyncRunnerTest { @Mock DocumentService documentService; @Mock TranscriptionService transcriptionService; @Mock AnnotationService annotationService; - @Mock TranscriptionBlockRepository blockRepository; @Mock FileService fileService; @Mock OcrJobRepository ocrJobRepository; @Mock OcrJobDocumentRepository ocrJobDocumentRepository; @@ -42,61 +38,59 @@ class OcrAsyncRunnerTest { void processDocument_clearsExistingBlocks() { UUID docId = UUID.randomUUID(); UUID userId = UUID.randomUUID(); - TranscriptionBlock existing = TranscriptionBlock.builder() - .id(UUID.randomUUID()).documentId(docId).build(); Document doc = Document.builder().id(docId).filePath("test.pdf") .fileHash("hash").scriptType(ScriptType.TYPEWRITER).build(); - when(transcriptionService.listBlocks(docId)).thenReturn(List.of(existing)); when(fileService.generatePresignedUrl(any())).thenReturn("http://presigned"); when(ocrClient.extractBlocks(any(), any())).thenReturn(List.of()); ocrAsyncRunner.processDocument(docId, doc, userId); - verify(transcriptionService).deleteBlock(docId, existing.getId()); + verify(transcriptionService).deleteAllBlocksByDocument(docId); } @Test void processDocument_createsAnnotationAndBlock_forEachResult() { UUID docId = UUID.randomUUID(); UUID userId = UUID.randomUUID(); + UUID annId = UUID.randomUUID(); Document doc = Document.builder().id(docId).filePath("test.pdf") .fileHash("hash").scriptType(ScriptType.TYPEWRITER).build(); - when(transcriptionService.listBlocks(docId)).thenReturn(List.of()); + when(fileService.generatePresignedUrl(any())).thenReturn("http://presigned"); when(ocrClient.extractBlocks(any(), any())).thenReturn(List.of( new OcrBlockResult(0, 0.1, 0.1, 0.8, 0.04, null, "Line 1"), new OcrBlockResult(0, 0.1, 0.2, 0.8, 0.04, null, "Line 2"))); - DocumentAnnotation ann = DocumentAnnotation.builder().id(UUID.randomUUID()).build(); + DocumentAnnotation ann = DocumentAnnotation.builder().id(annId).build(); when(annotationService.createOcrAnnotation(any(), any(), any(), any(), any())).thenReturn(ann); ocrAsyncRunner.processDocument(docId, doc, userId); verify(annotationService, times(2)).createOcrAnnotation( eq(docId), any(CreateAnnotationDTO.class), eq(userId), eq("hash"), any()); - verify(blockRepository, times(2)).save(any()); + verify(transcriptionService, times(2)).createOcrBlock( + eq(docId), eq(annId), any(), anyInt(), eq(userId)); } @Test - void processDocument_setsBlockSourceToOcr() { + void processDocument_delegatesBlockCreationToTranscriptionService() { UUID docId = UUID.randomUUID(); UUID userId = UUID.randomUUID(); + UUID annId = UUID.randomUUID(); Document doc = Document.builder().id(docId).filePath("test.pdf") .fileHash("hash").scriptType(ScriptType.TYPEWRITER).build(); - when(transcriptionService.listBlocks(docId)).thenReturn(List.of()); + when(fileService.generatePresignedUrl(any())).thenReturn("http://presigned"); when(ocrClient.extractBlocks(any(), any())).thenReturn(List.of( new OcrBlockResult(0, 0.1, 0.1, 0.8, 0.04, null, "Test"))); - DocumentAnnotation ann = DocumentAnnotation.builder().id(UUID.randomUUID()).build(); + DocumentAnnotation ann = DocumentAnnotation.builder().id(annId).build(); when(annotationService.createOcrAnnotation(any(), any(), any(), any(), any())).thenReturn(ann); ocrAsyncRunner.processDocument(docId, doc, userId); - ArgumentCaptor captor = ArgumentCaptor.forClass(TranscriptionBlock.class); - verify(blockRepository).save(captor.capture()); - assertThat(captor.getValue().getSource()).isEqualTo(BlockSource.OCR); + verify(transcriptionService).createOcrBlock(docId, annId, "Test", 0, userId); } @Test @@ -115,7 +109,7 @@ class OcrAsyncRunnerTest { when(ocrJobDocumentRepository.findByJobIdAndDocumentId(jobId, docId)) .thenReturn(Optional.of(jobDoc)); when(documentService.getDocumentById(docId)).thenReturn(doc); - when(transcriptionService.listBlocks(docId)).thenReturn(List.of()); + when(fileService.generatePresignedUrl(any())).thenReturn("http://presigned"); doAnswer(inv -> { Consumer handler = inv.getArgument(2); @@ -146,7 +140,7 @@ class OcrAsyncRunnerTest { when(ocrJobDocumentRepository.findByJobIdAndDocumentId(jobId, docId)) .thenReturn(Optional.of(jobDoc)); when(documentService.getDocumentById(docId)).thenReturn(doc); - when(transcriptionService.listBlocks(docId)).thenReturn(List.of()); + when(fileService.generatePresignedUrl(any())).thenReturn("http://presigned"); doThrow(new RuntimeException("OCR failed")).when(ocrClient).streamBlocks(any(), any(), any()); @@ -174,7 +168,7 @@ class OcrAsyncRunnerTest { .thenReturn(Optional.of(jobDoc)); when(ocrJobDocumentRepository.save(any())).thenAnswer(inv -> inv.getArgument(0)); when(documentService.getDocumentById(docId)).thenReturn(doc); - when(transcriptionService.listBlocks(docId)).thenReturn(List.of()); + when(fileService.generatePresignedUrl(any())).thenReturn("http://presigned"); when(annotationService.createOcrAnnotation(any(), any(), any(), any(), any())).thenReturn(ann); @@ -217,7 +211,7 @@ class OcrAsyncRunnerTest { .thenReturn(Optional.of(jobDoc)); when(ocrJobDocumentRepository.save(any())).thenAnswer(inv -> inv.getArgument(0)); when(documentService.getDocumentById(docId)).thenReturn(doc); - when(transcriptionService.listBlocks(docId)).thenReturn(List.of()); + when(fileService.generatePresignedUrl(any())).thenReturn("http://presigned"); doAnswer(inv -> { @@ -253,7 +247,7 @@ class OcrAsyncRunnerTest { .thenReturn(Optional.of(jobDoc)); when(ocrJobDocumentRepository.save(any())).thenAnswer(inv -> inv.getArgument(0)); when(documentService.getDocumentById(docId)).thenReturn(doc); - when(transcriptionService.listBlocks(docId)).thenReturn(List.of()); + when(fileService.generatePresignedUrl(any())).thenReturn("http://presigned"); doAnswer(inv -> { diff --git a/backend/src/test/java/org/raddatz/familienarchiv/service/TranscriptionServiceTest.java b/backend/src/test/java/org/raddatz/familienarchiv/service/TranscriptionServiceTest.java index f25a884b..f8ca7753 100644 --- a/backend/src/test/java/org/raddatz/familienarchiv/service/TranscriptionServiceTest.java +++ b/backend/src/test/java/org/raddatz/familienarchiv/service/TranscriptionServiceTest.java @@ -10,6 +10,7 @@ import org.raddatz.familienarchiv.dto.CreateTranscriptionBlockDTO; import org.raddatz.familienarchiv.dto.ReorderTranscriptionBlocksDTO; import org.raddatz.familienarchiv.dto.UpdateTranscriptionBlockDTO; import org.raddatz.familienarchiv.exception.DomainException; +import org.raddatz.familienarchiv.model.BlockSource; import org.raddatz.familienarchiv.model.Document; import org.raddatz.familienarchiv.model.DocumentAnnotation; import org.raddatz.familienarchiv.model.TranscriptionBlock; @@ -26,8 +27,8 @@ import static org.assertj.core.api.Assertions.assertThat; import static org.assertj.core.api.Assertions.assertThatThrownBy; import static org.mockito.ArgumentMatchers.any; import static org.mockito.ArgumentMatchers.eq; -import static org.mockito.Mockito.verify; -import static org.mockito.Mockito.when; +import static org.mockito.Mockito.*; + import static org.springframework.http.HttpStatus.NOT_FOUND; @ExtendWith(MockitoExtension.class) @@ -99,6 +100,50 @@ class TranscriptionServiceTest { verify(versionRepository).save(any(TranscriptionBlockVersion.class)); } + // ─── createOcrBlock ────────────────────────────────────────────────────────── + + @Test + void createOcrBlock_createsBlockWithOcrSourceAndSavesVersion() { + UUID docId = UUID.randomUUID(); + UUID annotId = UUID.randomUUID(); + UUID userId = UUID.randomUUID(); + + when(blockRepository.save(any())).thenAnswer(inv -> { + TranscriptionBlock b = inv.getArgument(0); + b.setId(UUID.randomUUID()); + return b; + }); + + TranscriptionBlock result = transcriptionService.createOcrBlock( + docId, annotId, "OCR text", 3, userId); + + assertThat(result.getAnnotationId()).isEqualTo(annotId); + assertThat(result.getDocumentId()).isEqualTo(docId); + assertThat(result.getText()).isEqualTo("OCR text"); + assertThat(result.getSortOrder()).isEqualTo(3); + assertThat(result.getSource()).isEqualTo(BlockSource.OCR); + assertThat(result.getCreatedBy()).isEqualTo(userId); + verify(versionRepository).save(any(TranscriptionBlockVersion.class)); + } + + @Test + void createOcrBlock_sanitizesNullText() { + UUID docId = UUID.randomUUID(); + UUID annotId = UUID.randomUUID(); + UUID userId = UUID.randomUUID(); + + when(blockRepository.save(any())).thenAnswer(inv -> { + TranscriptionBlock b = inv.getArgument(0); + b.setId(UUID.randomUUID()); + return b; + }); + + TranscriptionBlock result = transcriptionService.createOcrBlock( + docId, annotId, null, 0, userId); + + assertThat(result.getText()).isEmpty(); + } + // ─── updateBlock ───────────────────────────────────────────────────────────── @Test @@ -168,6 +213,39 @@ class TranscriptionServiceTest { .satisfies(e -> assertThat(((DomainException) e).getStatus()).isEqualTo(NOT_FOUND)); } + // ─── deleteAllBlocksByDocument ───────────────────────────────────────────── + + @Test + void deleteAllBlocksByDocument_deletesAllBlocksAndAnnotations() { + UUID docId = UUID.randomUUID(); + UUID annId1 = UUID.randomUUID(); + UUID annId2 = UUID.randomUUID(); + + TranscriptionBlock block1 = TranscriptionBlock.builder() + .id(UUID.randomUUID()).documentId(docId).annotationId(annId1).sortOrder(0).build(); + TranscriptionBlock block2 = TranscriptionBlock.builder() + .id(UUID.randomUUID()).documentId(docId).annotationId(annId2).sortOrder(1).build(); + + when(blockRepository.findByDocumentIdOrderBySortOrderAsc(docId)) + .thenReturn(List.of(block1, block2)); + + transcriptionService.deleteAllBlocksByDocument(docId); + + verify(blockRepository).deleteAll(List.of(block1, block2)); + verify(blockRepository).flush(); + verify(annotationRepository).deleteAllById(List.of(annId1, annId2)); + } + + @Test + void deleteAllBlocksByDocument_doesNothing_whenNoBlocksExist() { + UUID docId = UUID.randomUUID(); + when(blockRepository.findByDocumentIdOrderBySortOrderAsc(docId)).thenReturn(List.of()); + + transcriptionService.deleteAllBlocksByDocument(docId); + + verify(blockRepository, never()).deleteAll(any()); + } + // ─── reorderBlocks ─────────────────────────────────────────────────────────── @Test