feat(ocr): full OCR pipeline with polygon annotations, training, and guided mode #232
@@ -104,8 +104,8 @@ public class TranscriptionService {
|
||||
public TranscriptionBlock upsertGuidedBlock(UUID documentId, UUID annotationId,
|
||||
String text, UUID userId) {
|
||||
return blockRepository.findByAnnotationId(annotationId).map(existing -> {
|
||||
if (existing.getSource() == BlockSource.MANUAL) {
|
||||
return existing; // never overwrite manual transcription
|
||||
if (existing.getSource() == BlockSource.MANUAL && !existing.getText().isBlank()) {
|
||||
return existing; // never overwrite non-empty manual transcription
|
||||
}
|
||||
existing.setText(sanitizeText(text));
|
||||
existing.setUpdatedBy(userId);
|
||||
|
||||
@@ -74,7 +74,7 @@ class TranscriptionServiceGuidedTest {
|
||||
}
|
||||
|
||||
@Test
|
||||
void upsertGuidedBlock_doesNotOverwriteManualBlock() {
|
||||
void upsertGuidedBlock_doesNotOverwriteNonEmptyManualBlock() {
|
||||
TranscriptionBlock manual = TranscriptionBlock.builder()
|
||||
.id(UUID.randomUUID())
|
||||
.annotationId(annId)
|
||||
@@ -90,4 +90,22 @@ class TranscriptionServiceGuidedTest {
|
||||
assertThat(result.getText()).isEqualTo("manually written");
|
||||
verify(blockRepository, never()).save(any());
|
||||
}
|
||||
|
||||
@Test
|
||||
void upsertGuidedBlock_fillsEmptyManualBlock_withOcrText() {
|
||||
TranscriptionBlock emptyManual = TranscriptionBlock.builder()
|
||||
.id(UUID.randomUUID())
|
||||
.annotationId(annId)
|
||||
.documentId(docId)
|
||||
.text("")
|
||||
.source(BlockSource.MANUAL)
|
||||
.sortOrder(0)
|
||||
.build();
|
||||
when(blockRepository.findByAnnotationId(annId)).thenReturn(Optional.of(emptyManual));
|
||||
|
||||
TranscriptionBlock result = service.upsertGuidedBlock(docId, annId, "ocr result", userId);
|
||||
|
||||
assertThat(result.getText()).isEqualTo("ocr result");
|
||||
verify(blockRepository).save(any());
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user