feat(ocr): add default streamBlocks method to OcrClient interface

The default method synthesizes Start/Page/Done events from
extractBlocks() results, providing backward compatibility for
implementations that don't support streaming natively.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Marcel
2026-04-13 10:01:26 +02:00
parent e21d01e10b
commit 641e91d5a3
2 changed files with 81 additions and 0 deletions

View File

@@ -0,0 +1,55 @@
package org.raddatz.familienarchiv.service;
import org.junit.jupiter.api.Test;
import org.raddatz.familienarchiv.model.ScriptType;
import java.util.ArrayList;
import java.util.List;
import static org.assertj.core.api.Assertions.assertThat;
class OcrClientDefaultStreamTest {
@Test
void defaultStreamBlocksSynthesizesEventsFromExtractBlocks() {
OcrClient client = (pdfUrl, scriptType) -> List.of(
new OcrBlockResult(0, 0.1, 0.1, 0.8, 0.04, null, "Line 1"),
new OcrBlockResult(0, 0.1, 0.2, 0.8, 0.04, null, "Line 2"),
new OcrBlockResult(1, 0.1, 0.1, 0.8, 0.04, null, "Line 3"));
List<OcrStreamEvent> events = new ArrayList<>();
client.streamBlocks("http://test", ScriptType.TYPEWRITER, events::add);
assertThat(events).hasSize(4);
assertThat(events.get(0)).isInstanceOf(OcrStreamEvent.Start.class);
assertThat(((OcrStreamEvent.Start) events.get(0)).totalPages()).isEqualTo(2);
assertThat(events.get(1)).isInstanceOf(OcrStreamEvent.Page.class);
var page0 = (OcrStreamEvent.Page) events.get(1);
assertThat(page0.pageNumber()).isEqualTo(0);
assertThat(page0.blocks()).hasSize(2);
assertThat(events.get(2)).isInstanceOf(OcrStreamEvent.Page.class);
var page1 = (OcrStreamEvent.Page) events.get(2);
assertThat(page1.pageNumber()).isEqualTo(1);
assertThat(page1.blocks()).hasSize(1);
assertThat(events.get(3)).isInstanceOf(OcrStreamEvent.Done.class);
var done = (OcrStreamEvent.Done) events.get(3);
assertThat(done.totalBlocks()).isEqualTo(3);
assertThat(done.skippedPages()).isEqualTo(0);
}
@Test
void defaultStreamBlocksHandlesEmptyResults() {
OcrClient client = (pdfUrl, scriptType) -> List.of();
List<OcrStreamEvent> events = new ArrayList<>();
client.streamBlocks("http://test", ScriptType.TYPEWRITER, events::add);
assertThat(events).hasSize(2);
assertThat(events.get(0)).isInstanceOf(OcrStreamEvent.Start.class);
assertThat(((OcrStreamEvent.Start) events.get(0)).totalPages()).isEqualTo(0);
assertThat(events.get(1)).isInstanceOf(OcrStreamEvent.Done.class);
}
}