diff --git a/docker-compose.yml b/docker-compose.yml index 72a70e13..3d194e75 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -78,14 +78,16 @@ services: dockerfile: Dockerfile container_name: archive-ocr restart: unless-stopped - mem_limit: 10g - memswap_limit: 10g + mem_limit: 6g + memswap_limit: 6g volumes: - ocr_models:/app/models environment: KRAKEN_MODEL_PATH: /app/models/german_kurrent.mlmodel OCR_CONFIDENCE_THRESHOLD: "0.3" OCR_CONFIDENCE_THRESHOLD_KURRENT: "0.5" + RECOGNITION_BATCH_SIZE: "1" + DETECTOR_BATCH_SIZE: "1" networks: - archive-net healthcheck: diff --git a/ocr-service/engines/surya.py b/ocr-service/engines/surya.py index 94fc330b..ea028d8c 100644 --- a/ocr-service/engines/surya.py +++ b/ocr-service/engines/surya.py @@ -6,13 +6,20 @@ logger = logging.getLogger(__name__) _recognition_predictor = None _detection_predictor = None +_loaded = False def load_models(): - """Eagerly load Surya models into memory. Called once at container startup.""" - global _recognition_predictor, _detection_predictor + """Lazy-load Surya models on first use to save RAM at idle. - logger.info("Loading Surya models...") + Called automatically by extract_blocks(). Can also be called explicitly + to pre-warm if desired. + """ + global _recognition_predictor, _detection_predictor, _loaded + if _loaded: + return + + logger.info("Loading Surya models (lazy, first OCR request)...") from surya.foundation import FoundationPredictor from surya.recognition import RecognitionPredictor @@ -21,6 +28,7 @@ def load_models(): foundation_predictor = FoundationPredictor() _recognition_predictor = RecognitionPredictor(foundation_predictor) _detection_predictor = DetectionPredictor() + _loaded = True logger.info("Surya models loaded successfully") @@ -28,22 +36,25 @@ def load_models(): def extract_blocks(images: list, language: str = "de") -> list[dict]: """Run Surya OCR on a list of PIL images (one per page). + Processes one page at a time to limit peak memory usage. Returns a flat list of block dicts with pageNumber, x, y, width, height, - polygon, text. Coordinates are normalized to [0, 1] relative to page dimensions. - Surya 0.17+ returns polygon (4-point) natively on each text line. + polygon, text, words. Coordinates are normalized to [0, 1]. """ + load_models() + all_blocks = [] - predictions = _recognition_predictor(images, det_predictor=_detection_predictor) + for page_idx, image in enumerate(images): + page_w, page_h = image.size - for page_idx, page_pred in enumerate(predictions): - page_w, page_h = images[page_idx].size + # Process single page to limit peak memory + predictions = _recognition_predictor([image], det_predictor=_detection_predictor) + page_pred = predictions[0] for line in page_pred.text_lines: - bbox = line.bbox # [x1, y1, x2, y2] in pixel coordinates + bbox = line.bbox x1, y1, x2, y2 = bbox - # Surya 0.17 provides polygon as list of (x, y) tuples (4 points, clockwise) polygon = None if hasattr(line, "polygon") and line.polygon and len(line.polygon) == 4: polygon = [ @@ -51,7 +62,6 @@ def extract_blocks(images: list, language: str = "de") -> list[dict]: for p in line.polygon ] - # Extract word-level confidence for [unleserlich] marking words = [] if hasattr(line, "words") and line.words: for word in line.words: @@ -73,4 +83,7 @@ def extract_blocks(images: list, language: str = "de") -> list[dict]: "words": words, }) + # Free page image after processing + del image + return all_blocks diff --git a/ocr-service/main.py b/ocr-service/main.py index ee8d9935..73dbef28 100644 --- a/ocr-service/main.py +++ b/ocr-service/main.py @@ -22,14 +22,13 @@ _models_ready = False @asynccontextmanager async def lifespan(app: FastAPI): - """Load all OCR models at startup before accepting requests.""" + """Load lightweight models at startup. Surya loads lazily on first request.""" global _models_ready - logger.info("Loading OCR models at startup...") - surya_engine.load_models() + logger.info("Loading Kraken model at startup (Surya loads lazily on first OCR request)...") kraken_engine.load_models() _models_ready = True - logger.info("All OCR models loaded — ready to accept requests") + logger.info("Startup complete — ready to accept requests") yield