feat(ocr): add default streamBlocks method to OcrClient interface

The default method synthesizes Start/Page/Done events from
extractBlocks() results, providing backward compatibility for
implementations that don't support streaming natively.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Marcel
2026-04-13 10:01:26 +02:00
parent e21d01e10b
commit 641e91d5a3
2 changed files with 81 additions and 0 deletions

View File

@@ -2,8 +2,34 @@ package org.raddatz.familienarchiv.service;
import org.raddatz.familienarchiv.model.ScriptType;
import java.util.ArrayList;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.function.Consumer;
public interface OcrClient {
List<OcrBlockResult> extractBlocks(String pdfUrl, ScriptType scriptType);
/**
* Stream OCR results page-by-page via NDJSON. Implementations should override
* this method. The default exists only for backward compatibility during migration
* — it calls extractBlocks() and synthesizes events from the collected result.
*/
default void streamBlocks(String pdfUrl, ScriptType scriptType, Consumer<OcrStreamEvent> handler) {
List<OcrBlockResult> allBlocks = extractBlocks(pdfUrl, scriptType);
LinkedHashMap<Integer, List<OcrBlockResult>> byPage = new LinkedHashMap<>();
for (OcrBlockResult block : allBlocks) {
byPage.computeIfAbsent(block.pageNumber(), k -> new ArrayList<>()).add(block);
}
int totalPages = byPage.isEmpty() ? 0 : byPage.keySet().stream().mapToInt(i -> i).max().orElse(0) + 1;
handler.accept(new OcrStreamEvent.Start(totalPages));
for (var entry : byPage.entrySet()) {
handler.accept(new OcrStreamEvent.Page(entry.getKey(), entry.getValue()));
}
handler.accept(new OcrStreamEvent.Done(allBlocks.size(), 0));
}
}