Compare commits
4 Commits
669eaa7c65
...
581ba01d8d
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
581ba01d8d | ||
|
|
9db42d6cc1 | ||
|
|
ab24786d2a | ||
|
|
1aca4c4a41 |
@@ -87,8 +87,9 @@ services:
|
||||
memswap_limit: 12g
|
||||
volumes:
|
||||
- ocr_models:/app/models
|
||||
- ocr_cache:/root/.cache # Hugging Face / ketos model download cache — prevents re-downloads on container recreate
|
||||
- ocr_cache:/app/cache
|
||||
environment:
|
||||
HF_HOME: /app/cache
|
||||
KRAKEN_MODEL_PATH: /app/models/german_kurrent.mlmodel
|
||||
TRAINING_TOKEN: "${OCR_TRAINING_TOKEN:-}"
|
||||
OCR_CONFIDENCE_THRESHOLD: "0.3"
|
||||
@@ -106,6 +107,12 @@ services:
|
||||
timeout: 5s
|
||||
retries: 12
|
||||
start_period: 120s
|
||||
read_only: true
|
||||
tmpfs:
|
||||
- /tmp:size=512m # training endpoints write ZIPs to /tmp; 512 MB covers typical batches (20–50 images)
|
||||
cap_drop: [ALL]
|
||||
security_opt:
|
||||
- no-new-privileges:true
|
||||
|
||||
# --- Backend: Spring Boot ---
|
||||
backend:
|
||||
|
||||
@@ -23,8 +23,16 @@ RUN pip install --no-cache-dir -r requirements.txt
|
||||
|
||||
COPY . .
|
||||
|
||||
RUN useradd --no-create-home --shell /usr/sbin/nologin --uid 1000 ocr \
|
||||
&& mkdir -p /home/ocr /app/models /app/cache \
|
||||
&& chown -R ocr:ocr /app /home/ocr
|
||||
RUN chmod +x /app/entrypoint.sh
|
||||
|
||||
ENV HOME=/home/ocr
|
||||
ENV HF_HOME=/app/cache
|
||||
|
||||
USER ocr
|
||||
|
||||
EXPOSE 8000
|
||||
|
||||
CMD ["/app/entrypoint.sh"]
|
||||
|
||||
@@ -24,7 +24,7 @@ log = logging.getLogger(__name__)
|
||||
BLLA_MODEL_PATH = os.environ.get("BLLA_MODEL_PATH", "/app/models/blla.mlmodel")
|
||||
# DOI for "General segmentation model for print and handwriting" — ketos 7 compatible.
|
||||
BLLA_MODEL_DOI = "10.5281/zenodo.14602569"
|
||||
HTRMOPO_DIR = os.path.expanduser("~/.local/share/htrmopo")
|
||||
HTRMOPO_DIR = os.environ.get("HTRMOPO_DIR", "/app/models/.htrmopo")
|
||||
|
||||
|
||||
def _model_is_loadable(path: str) -> bool:
|
||||
|
||||
@@ -56,6 +56,8 @@ async def lifespan(app: FastAPI):
|
||||
"""Load lightweight models at startup. Surya loads lazily on first request."""
|
||||
global _models_ready
|
||||
|
||||
if os.getuid() == 0:
|
||||
logger.warning("Running as root — CIS Docker §4.1 violation")
|
||||
logger.info("Loading Kraken model at startup (Surya loads lazily on first OCR request)...")
|
||||
kraken_engine.load_models()
|
||||
load_spell_checker()
|
||||
|
||||
@@ -1,10 +1,35 @@
|
||||
"""Unit tests for ensure_blla_model.main()."""
|
||||
|
||||
import importlib
|
||||
import os
|
||||
from unittest.mock import MagicMock, call, patch
|
||||
|
||||
import ensure_blla_model
|
||||
|
||||
|
||||
# ─── HTRMOPO_DIR env var resolution ──────────────────────────────────────────
|
||||
|
||||
|
||||
def test_htrmopo_dir_reads_from_env_var():
|
||||
"""HTRMOPO_DIR uses the HTRMOPO_DIR env var when set, not ~ expansion."""
|
||||
with patch.dict(os.environ, {"HTRMOPO_DIR": "/custom/htrmopo"}):
|
||||
importlib.reload(ensure_blla_model)
|
||||
result = ensure_blla_model.HTRMOPO_DIR
|
||||
importlib.reload(ensure_blla_model)
|
||||
assert result == "/custom/htrmopo"
|
||||
|
||||
|
||||
def test_htrmopo_dir_default_is_fixed_path():
|
||||
"""Default HTRMOPO_DIR is a fixed path not derived from ~ (no-create-home safe)."""
|
||||
clean_env = {k: v for k, v in os.environ.items() if k != "HTRMOPO_DIR"}
|
||||
with patch.dict(os.environ, clean_env, clear=True):
|
||||
importlib.reload(ensure_blla_model)
|
||||
result = ensure_blla_model.HTRMOPO_DIR
|
||||
importlib.reload(ensure_blla_model)
|
||||
assert "~" not in result
|
||||
assert not result.startswith("/.")
|
||||
|
||||
|
||||
# ─── Model already loadable ───────────────────────────────────────────────────
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user