feat(ocr): increment ocr_pages_total per successful page in stream

Bumps the counter inside both the standard and guided /ocr/stream
generators after a page yields its blocks, before the per-page json line is
emitted. Also moves the ocr_jobs_total increment for /ocr/stream right after
engine selection so the counter still fires when a page later errors out.

Refs #652 (AC3a)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Marcel
2026-05-21 16:05:36 +02:00
parent 52d8dc2b20
commit 79edb94558
2 changed files with 37 additions and 0 deletions

View File

@@ -156,6 +156,9 @@ async def run_ocr_stream(request: OcrRequest):
)
engine = kraken_engine if use_kraken else surya_engine
engine_name = "kraken" if use_kraken else "surya"
metrics.ocr_jobs_total.labels(engine=engine_name, script_type=script_type).inc()
if request.regions:
# Guided mode: recognize only the user-drawn annotation regions
@@ -206,6 +209,7 @@ async def run_ocr_stream(request: OcrRequest):
})
total_blocks += len(blocks)
metrics.ocr_pages_total.labels(engine=engine_name).inc()
yield json.dumps({
"type": "page",
"pageNumber": page_idx,
@@ -260,6 +264,7 @@ async def run_ocr_stream(request: OcrRequest):
block["text"] = correct_text(block["text"])
total_blocks += len(blocks)
metrics.ocr_pages_total.labels(engine=engine_name).inc()
yield json.dumps({
"type": "page",
"pageNumber": page_idx,