fix(ocr): reduce memory usage for 16GB dev machines

- Surya models lazy-load on first OCR request instead of at startup (saves ~3-4GB idle RAM — Kraken stays eager at ~16MB) - Process one page at a time in Surya engine (limits peak memory) - RECOGNITION_BATCH_SIZE=1, DETECTOR_BATCH_SIZE=1 (slower but fits in RAM) - Revert mem_limit back to 6GB (sufficient with these optimizations) - Render DPI stays at 200 Idle memory: ~2GB (Kraken only). Peak during OCR: ~5-6GB (Surya loaded). Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-12 22:26:50 +02:00
parent 7f78bc9cf4
commit 902d423f3c
3 changed files with 31 additions and 17 deletions
--- a/ocr-service/main.py
+++ b/ocr-service/main.py
@@ -22,14 +22,13 @@ _models_ready = False

@asynccontextmanager
 async def lifespan(app: FastAPI):
-    """Load all OCR models at startup before accepting requests."""
+    """Load lightweight models at startup. Surya loads lazily on first request."""
    global _models_ready

-    logger.info("Loading OCR models at startup...")
-    surya_engine.load_models()
+    logger.info("Loading Kraken model at startup (Surya loads lazily on first OCR request)...")
    kraken_engine.load_models()
    _models_ready = True
-    logger.info("All OCR models loaded — ready to accept requests")
+    logger.info("Startup complete — ready to accept requests")

    yield