feat: OCR pipeline with NDJSON streaming and real-time progress (#226, #227, #231) #229

Merged
marcel merged 74 commits from feat/issue-226-227-ocr-pipeline-polygon into main 2026-04-13 12:39:04 +02:00
2 changed files with 20 additions and 0 deletions
Showing only changes of commit 9282e46a02 - Show all commits

View File

@@ -1,7 +1,10 @@
package org.raddatz.familienarchiv.service;
import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
import java.util.List;
@JsonIgnoreProperties(ignoreUnknown = true)
public record OcrBlockResult(
int pageNumber,
double x,

View File

@@ -98,6 +98,23 @@ class RestClientOcrClientStreamTest {
assertThat(events).hasSize(2);
}
@Test
void parseNdjsonStream_handlesUnknownFieldsInBlocks() {
String ndjson = """
{"type":"start","totalPages":1}
{"type":"page","pageNumber":0,"blocks":[{"pageNumber":0,"x":0.1,"y":0.2,"width":0.8,"height":0.1,"polygon":null,"text":"Line 1","confidence":0.95,"newFutureField":"ignored"}]}
{"type":"done","totalBlocks":1,"skippedPages":0}
""";
InputStream stream = new ByteArrayInputStream(ndjson.getBytes(StandardCharsets.UTF_8));
List<OcrStreamEvent> events = new ArrayList<>();
RestClientOcrClient.parseNdjsonStream(stream, events::add);
assertThat(events).hasSize(3);
var page = (OcrStreamEvent.Page) events.get(1);
assertThat(page.blocks().get(0).text()).isEqualTo("Line 1");
}
@Test
void parseNdjsonStream_parsesPageWithPolygon() {
String ndjson = """