fix(ocr): unblock event loop during OCR and show errors in UI
OCR engines are CPU-bound and were blocking Uvicorn's single async event loop, making /health unresponsive during processing. This caused new OCR requests to fail silently (health check failure → no DB record → UI shows NONE). Wrap engine calls in asyncio.to_thread() to keep the event loop free. Also surface OCR trigger errors in the frontend instead of silently resetting the spinner. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -1,5 +1,6 @@
|
||||
"""OCR microservice — FastAPI app with Surya and Kraken engine support."""
|
||||
|
||||
import asyncio
|
||||
import io
|
||||
import logging
|
||||
from contextlib import asynccontextmanager
|
||||
@@ -52,6 +53,7 @@ async def run_ocr(request: OcrRequest):
|
||||
|
||||
Downloads the PDF from the provided URL, converts pages to images,
|
||||
and runs the appropriate OCR engine based on scriptType.
|
||||
OCR engines run in a thread pool so the event loop stays free for /health.
|
||||
"""
|
||||
if not _models_ready:
|
||||
raise HTTPException(status_code=503, detail="Models not loaded yet")
|
||||
@@ -66,10 +68,10 @@ async def run_ocr(request: OcrRequest):
|
||||
status_code=400,
|
||||
detail="Kraken model not available — cannot process Kurrent script",
|
||||
)
|
||||
blocks = kraken_engine.extract_blocks(images, request.language)
|
||||
blocks = await asyncio.to_thread(kraken_engine.extract_blocks, images, request.language)
|
||||
else:
|
||||
# TYPEWRITER, HANDWRITING_LATIN, UNKNOWN — all use Surya
|
||||
blocks = surya_engine.extract_blocks(images, request.language)
|
||||
blocks = await asyncio.to_thread(surya_engine.extract_blocks, images, request.language)
|
||||
|
||||
threshold = get_threshold(script_type)
|
||||
for block in blocks:
|
||||
|
||||
Reference in New Issue
Block a user