From e8375d6c72a947b868bbeaee05f618dbfb65e86d Mon Sep 17 00:00:00 2001 From: Marcel Date: Tue, 14 Apr 2026 13:06:12 +0200 Subject: [PATCH] fix(ocr-service): add entrypoint that validates blla model format on startup Adds ensure_blla_model.py which loads the blla segmentation model with ketos on every container start. If the model is missing or in the legacy PyTorch ZIP format (incompatible with ketos 7), it re-downloads the correct CoreML protobuf model from Zenodo (DOI 10.5281/zenodo.14602569). The Dockerfile now uses entrypoint.sh which runs this check before starting uvicorn. Co-Authored-By: Claude Sonnet 4.6 --- ocr-service/Dockerfile | 4 +- ocr-service/ensure_blla_model.py | 77 ++++++++++++++++++++++++++++++++ ocr-service/entrypoint.sh | 9 ++++ 3 files changed, 89 insertions(+), 1 deletion(-) create mode 100644 ocr-service/ensure_blla_model.py create mode 100644 ocr-service/entrypoint.sh diff --git a/ocr-service/Dockerfile b/ocr-service/Dockerfile index 01b8ebfa..a8ec48df 100644 --- a/ocr-service/Dockerfile +++ b/ocr-service/Dockerfile @@ -21,6 +21,8 @@ RUN pip install --no-cache-dir -r requirements.txt COPY . . +RUN chmod +x /app/entrypoint.sh + EXPOSE 8000 -CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000", "--workers", "1"] +CMD ["/app/entrypoint.sh"] diff --git a/ocr-service/ensure_blla_model.py b/ocr-service/ensure_blla_model.py new file mode 100644 index 00000000..a8aed449 --- /dev/null +++ b/ocr-service/ensure_blla_model.py @@ -0,0 +1,77 @@ +"""Validates the blla segmentation base model and downloads it if needed. + +Run at container startup before uvicorn. ketos 7 requires the model in +CoreML protobuf or safetensors format — legacy PyTorch ZIP archives +(torch.save output from kraken <4) are not loadable and will be replaced. + +Exits non-zero on failure so Docker marks the container unhealthy rather +than silently starting with a broken model. +""" + +import glob +import logging +import os +import shutil +import subprocess +import sys + +logging.basicConfig( + level=logging.INFO, + format="%(levelname)s:ensure_blla_model:%(message)s", +) +log = logging.getLogger(__name__) + +BLLA_MODEL_PATH = os.environ.get("BLLA_MODEL_PATH", "/app/models/blla.mlmodel") +# DOI for "General segmentation model for print and handwriting" — ketos 7 compatible. +BLLA_MODEL_DOI = "10.5281/zenodo.14602569" +HTRMOPO_DIR = os.path.expanduser("~/.local/share/htrmopo") + + +def _model_is_loadable(path: str) -> bool: + try: + from kraken.lib import vgsl + + vgsl.TorchVGSLModel.load_model(path) + return True + except Exception as e: + log.warning("Model at %s failed to load: %s", path, e) + return False + + +def _download_blla() -> str: + log.info("Downloading blla model (DOI %s) ...", BLLA_MODEL_DOI) + result = subprocess.run( + ["kraken", "get", BLLA_MODEL_DOI], + capture_output=True, + text=True, + ) + if result.returncode != 0: + log.error("kraken get failed: %s", result.stderr) + sys.exit(1) + + candidates = sorted(glob.glob(os.path.join(HTRMOPO_DIR, "*/blla.mlmodel"))) + if not candidates: + log.error("Downloaded blla.mlmodel not found under %s", HTRMOPO_DIR) + sys.exit(1) + + return candidates[-1] + + +def main() -> None: + if os.path.exists(BLLA_MODEL_PATH): + if _model_is_loadable(BLLA_MODEL_PATH): + log.info("blla model OK: %s", BLLA_MODEL_PATH) + return + log.warning( + "blla model at %s is in an incompatible format — replacing", BLLA_MODEL_PATH + ) + os.rename(BLLA_MODEL_PATH, BLLA_MODEL_PATH + ".incompatible") + + os.makedirs(os.path.dirname(BLLA_MODEL_PATH), exist_ok=True) + downloaded = _download_blla() + shutil.copy2(downloaded, BLLA_MODEL_PATH) + log.info("Installed blla model at %s", BLLA_MODEL_PATH) + + +if __name__ == "__main__": + main() diff --git a/ocr-service/entrypoint.sh b/ocr-service/entrypoint.sh new file mode 100644 index 00000000..ec6892a8 --- /dev/null +++ b/ocr-service/entrypoint.sh @@ -0,0 +1,9 @@ +#!/bin/bash +set -euo pipefail + +# Validate the blla segmentation base model and download it if missing or +# incompatible. ketos 7 dropped support for legacy PyTorch ZIP archives — +# this ensures the volume always holds a loadable CoreML protobuf model. +python3 /app/ensure_blla_model.py + +exec uvicorn main:app --host 0.0.0.0 --port 8000 --workers 1