fix(ocr): unblock event loop during OCR and show errors in UI

OCR engines are CPU-bound and were blocking Uvicorn's single async
event loop, making /health unresponsive during processing. This caused
new OCR requests to fail silently (health check failure → no DB record
→ UI shows NONE). Wrap engine calls in asyncio.to_thread() to keep the
event loop free. Also surface OCR trigger errors in the frontend
instead of silently resetting the spinner.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Marcel
2026-04-12 23:50:39 +02:00
parent ef11e4af09
commit d8dcba1a71
2 changed files with 19 additions and 2 deletions

View File

@@ -1,5 +1,6 @@
"""OCR microservice — FastAPI app with Surya and Kraken engine support."""
import asyncio
import io
import logging
from contextlib import asynccontextmanager
@@ -52,6 +53,7 @@ async def run_ocr(request: OcrRequest):
Downloads the PDF from the provided URL, converts pages to images,
and runs the appropriate OCR engine based on scriptType.
OCR engines run in a thread pool so the event loop stays free for /health.
"""
if not _models_ready:
raise HTTPException(status_code=503, detail="Models not loaded yet")
@@ -66,10 +68,10 @@ async def run_ocr(request: OcrRequest):
status_code=400,
detail="Kraken model not available — cannot process Kurrent script",
)
blocks = kraken_engine.extract_blocks(images, request.language)
blocks = await asyncio.to_thread(kraken_engine.extract_blocks, images, request.language)
else:
# TYPEWRITER, HANDWRITING_LATIN, UNKNOWN — all use Surya
blocks = surya_engine.extract_blocks(images, request.language)
blocks = await asyncio.to_thread(surya_engine.extract_blocks, images, request.language)
threshold = get_threshold(script_type)
for block in blocks: