fix(ocr): reduce memory usage for 16GB dev machines
- Surya models lazy-load on first OCR request instead of at startup (saves ~3-4GB idle RAM — Kraken stays eager at ~16MB) - Process one page at a time in Surya engine (limits peak memory) - RECOGNITION_BATCH_SIZE=1, DETECTOR_BATCH_SIZE=1 (slower but fits in RAM) - Revert mem_limit back to 6GB (sufficient with these optimizations) - Render DPI stays at 200 Idle memory: ~2GB (Kraken only). Peak during OCR: ~5-6GB (Surya loaded). Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -78,14 +78,16 @@ services:
|
|||||||
dockerfile: Dockerfile
|
dockerfile: Dockerfile
|
||||||
container_name: archive-ocr
|
container_name: archive-ocr
|
||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
mem_limit: 10g
|
mem_limit: 6g
|
||||||
memswap_limit: 10g
|
memswap_limit: 6g
|
||||||
volumes:
|
volumes:
|
||||||
- ocr_models:/app/models
|
- ocr_models:/app/models
|
||||||
environment:
|
environment:
|
||||||
KRAKEN_MODEL_PATH: /app/models/german_kurrent.mlmodel
|
KRAKEN_MODEL_PATH: /app/models/german_kurrent.mlmodel
|
||||||
OCR_CONFIDENCE_THRESHOLD: "0.3"
|
OCR_CONFIDENCE_THRESHOLD: "0.3"
|
||||||
OCR_CONFIDENCE_THRESHOLD_KURRENT: "0.5"
|
OCR_CONFIDENCE_THRESHOLD_KURRENT: "0.5"
|
||||||
|
RECOGNITION_BATCH_SIZE: "1"
|
||||||
|
DETECTOR_BATCH_SIZE: "1"
|
||||||
networks:
|
networks:
|
||||||
- archive-net
|
- archive-net
|
||||||
healthcheck:
|
healthcheck:
|
||||||
|
|||||||
@@ -6,13 +6,20 @@ logger = logging.getLogger(__name__)
|
|||||||
|
|
||||||
_recognition_predictor = None
|
_recognition_predictor = None
|
||||||
_detection_predictor = None
|
_detection_predictor = None
|
||||||
|
_loaded = False
|
||||||
|
|
||||||
|
|
||||||
def load_models():
|
def load_models():
|
||||||
"""Eagerly load Surya models into memory. Called once at container startup."""
|
"""Lazy-load Surya models on first use to save RAM at idle.
|
||||||
global _recognition_predictor, _detection_predictor
|
|
||||||
|
|
||||||
logger.info("Loading Surya models...")
|
Called automatically by extract_blocks(). Can also be called explicitly
|
||||||
|
to pre-warm if desired.
|
||||||
|
"""
|
||||||
|
global _recognition_predictor, _detection_predictor, _loaded
|
||||||
|
if _loaded:
|
||||||
|
return
|
||||||
|
|
||||||
|
logger.info("Loading Surya models (lazy, first OCR request)...")
|
||||||
|
|
||||||
from surya.foundation import FoundationPredictor
|
from surya.foundation import FoundationPredictor
|
||||||
from surya.recognition import RecognitionPredictor
|
from surya.recognition import RecognitionPredictor
|
||||||
@@ -21,6 +28,7 @@ def load_models():
|
|||||||
foundation_predictor = FoundationPredictor()
|
foundation_predictor = FoundationPredictor()
|
||||||
_recognition_predictor = RecognitionPredictor(foundation_predictor)
|
_recognition_predictor = RecognitionPredictor(foundation_predictor)
|
||||||
_detection_predictor = DetectionPredictor()
|
_detection_predictor = DetectionPredictor()
|
||||||
|
_loaded = True
|
||||||
|
|
||||||
logger.info("Surya models loaded successfully")
|
logger.info("Surya models loaded successfully")
|
||||||
|
|
||||||
@@ -28,22 +36,25 @@ def load_models():
|
|||||||
def extract_blocks(images: list, language: str = "de") -> list[dict]:
|
def extract_blocks(images: list, language: str = "de") -> list[dict]:
|
||||||
"""Run Surya OCR on a list of PIL images (one per page).
|
"""Run Surya OCR on a list of PIL images (one per page).
|
||||||
|
|
||||||
|
Processes one page at a time to limit peak memory usage.
|
||||||
Returns a flat list of block dicts with pageNumber, x, y, width, height,
|
Returns a flat list of block dicts with pageNumber, x, y, width, height,
|
||||||
polygon, text. Coordinates are normalized to [0, 1] relative to page dimensions.
|
polygon, text, words. Coordinates are normalized to [0, 1].
|
||||||
Surya 0.17+ returns polygon (4-point) natively on each text line.
|
|
||||||
"""
|
"""
|
||||||
|
load_models()
|
||||||
|
|
||||||
all_blocks = []
|
all_blocks = []
|
||||||
|
|
||||||
predictions = _recognition_predictor(images, det_predictor=_detection_predictor)
|
for page_idx, image in enumerate(images):
|
||||||
|
page_w, page_h = image.size
|
||||||
|
|
||||||
for page_idx, page_pred in enumerate(predictions):
|
# Process single page to limit peak memory
|
||||||
page_w, page_h = images[page_idx].size
|
predictions = _recognition_predictor([image], det_predictor=_detection_predictor)
|
||||||
|
page_pred = predictions[0]
|
||||||
|
|
||||||
for line in page_pred.text_lines:
|
for line in page_pred.text_lines:
|
||||||
bbox = line.bbox # [x1, y1, x2, y2] in pixel coordinates
|
bbox = line.bbox
|
||||||
x1, y1, x2, y2 = bbox
|
x1, y1, x2, y2 = bbox
|
||||||
|
|
||||||
# Surya 0.17 provides polygon as list of (x, y) tuples (4 points, clockwise)
|
|
||||||
polygon = None
|
polygon = None
|
||||||
if hasattr(line, "polygon") and line.polygon and len(line.polygon) == 4:
|
if hasattr(line, "polygon") and line.polygon and len(line.polygon) == 4:
|
||||||
polygon = [
|
polygon = [
|
||||||
@@ -51,7 +62,6 @@ def extract_blocks(images: list, language: str = "de") -> list[dict]:
|
|||||||
for p in line.polygon
|
for p in line.polygon
|
||||||
]
|
]
|
||||||
|
|
||||||
# Extract word-level confidence for [unleserlich] marking
|
|
||||||
words = []
|
words = []
|
||||||
if hasattr(line, "words") and line.words:
|
if hasattr(line, "words") and line.words:
|
||||||
for word in line.words:
|
for word in line.words:
|
||||||
@@ -73,4 +83,7 @@ def extract_blocks(images: list, language: str = "de") -> list[dict]:
|
|||||||
"words": words,
|
"words": words,
|
||||||
})
|
})
|
||||||
|
|
||||||
|
# Free page image after processing
|
||||||
|
del image
|
||||||
|
|
||||||
return all_blocks
|
return all_blocks
|
||||||
|
|||||||
@@ -22,14 +22,13 @@ _models_ready = False
|
|||||||
|
|
||||||
@asynccontextmanager
|
@asynccontextmanager
|
||||||
async def lifespan(app: FastAPI):
|
async def lifespan(app: FastAPI):
|
||||||
"""Load all OCR models at startup before accepting requests."""
|
"""Load lightweight models at startup. Surya loads lazily on first request."""
|
||||||
global _models_ready
|
global _models_ready
|
||||||
|
|
||||||
logger.info("Loading OCR models at startup...")
|
logger.info("Loading Kraken model at startup (Surya loads lazily on first OCR request)...")
|
||||||
surya_engine.load_models()
|
|
||||||
kraken_engine.load_models()
|
kraken_engine.load_models()
|
||||||
_models_ready = True
|
_models_ready = True
|
||||||
logger.info("All OCR models loaded — ready to accept requests")
|
logger.info("Startup complete — ready to accept requests")
|
||||||
|
|
||||||
yield
|
yield
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user