feat(ocr): increment ocr_jobs_total with engine and script_type labels

Pick engine="kraken" for HANDWRITING_KURRENT, engine="surya" otherwise,
then increment after the blocks have been extracted.

Refs #652 (AC2)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Marcel
2026-05-21 16:03:37 +02:00
parent f3e3545d06
commit 696b71da5a
2 changed files with 41 additions and 0 deletions

View File

@@ -106,6 +106,7 @@ async def run_ocr(request: OcrRequest):
del img
script_type = request.scriptType.upper()
engine_name = "kraken" if script_type == "HANDWRITING_KURRENT" else "surya"
if script_type == "HANDWRITING_KURRENT":
if not kraken_engine.is_available():
@@ -119,6 +120,8 @@ async def run_ocr(request: OcrRequest):
# TYPEWRITER, HANDWRITING_LATIN, UNKNOWN — all use Surya
blocks = await asyncio.to_thread(surya_engine.extract_blocks, images, request.language)
metrics.ocr_jobs_total.labels(engine=engine_name, script_type=script_type).inc()
threshold = get_threshold(script_type)
for block in blocks:
if block.get("words"):