feat: OCR pipeline with NDJSON streaming and real-time progress (#226, #227, #231) #229

Merged
marcel merged 74 commits from feat/issue-226-227-ocr-pipeline-polygon into main 2026-04-13 12:39:04 +02:00
2 changed files with 65 additions and 0 deletions
Showing only changes of commit e21d01e10b - Show all commits

View File

@@ -0,0 +1,14 @@
package org.raddatz.familienarchiv.service;
import java.util.List;
public sealed interface OcrStreamEvent {
record Start(int totalPages) implements OcrStreamEvent {}
record Page(int pageNumber, List<OcrBlockResult> blocks) implements OcrStreamEvent {}
record Error(int pageNumber, String message) implements OcrStreamEvent {}
record Done(int totalBlocks, int skippedPages) implements OcrStreamEvent {}
}

View File

@@ -0,0 +1,51 @@
package org.raddatz.familienarchiv.service;
import org.junit.jupiter.api.Test;
import java.util.List;
import static org.assertj.core.api.Assertions.assertThat;
class OcrStreamEventTest {
@Test
void startRecordHoldsTotalPages() {
var start = new OcrStreamEvent.Start(5);
assertThat(start.totalPages()).isEqualTo(5);
assertThat(start).isInstanceOf(OcrStreamEvent.class);
}
@Test
void pageRecordHoldsBlocksAndPageNumber() {
var block = new OcrBlockResult(0, 0.1, 0.2, 0.8, 0.1, null, "Test");
var page = new OcrStreamEvent.Page(0, List.of(block));
assertThat(page.pageNumber()).isEqualTo(0);
assertThat(page.blocks()).hasSize(1);
}
@Test
void errorRecordHoldsPageAndMessage() {
var error = new OcrStreamEvent.Error(2, "failed");
assertThat(error.pageNumber()).isEqualTo(2);
assertThat(error.message()).isEqualTo("failed");
}
@Test
void doneRecordHoldsTotalBlocksAndSkippedPages() {
var done = new OcrStreamEvent.Done(12, 2);
assertThat(done.totalBlocks()).isEqualTo(12);
assertThat(done.skippedPages()).isEqualTo(2);
}
@Test
void patternMatchingWorksOnSealedInterface() {
OcrStreamEvent event = new OcrStreamEvent.Start(3);
String result = switch (event) {
case OcrStreamEvent.Start s -> "start:" + s.totalPages();
case OcrStreamEvent.Page p -> "page:" + p.pageNumber();
case OcrStreamEvent.Error e -> "error:" + e.pageNumber();
case OcrStreamEvent.Done d -> "done:" + d.totalBlocks();
};
assertThat(result).isEqualTo("start:3");
}
}