Compare commits
4 Commits
669eaa7c65
...
581ba01d8d
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
581ba01d8d | ||
|
|
9db42d6cc1 | ||
|
|
ab24786d2a | ||
|
|
1aca4c4a41 |
@@ -87,8 +87,9 @@ services:
|
|||||||
memswap_limit: 12g
|
memswap_limit: 12g
|
||||||
volumes:
|
volumes:
|
||||||
- ocr_models:/app/models
|
- ocr_models:/app/models
|
||||||
- ocr_cache:/root/.cache # Hugging Face / ketos model download cache — prevents re-downloads on container recreate
|
- ocr_cache:/app/cache
|
||||||
environment:
|
environment:
|
||||||
|
HF_HOME: /app/cache
|
||||||
KRAKEN_MODEL_PATH: /app/models/german_kurrent.mlmodel
|
KRAKEN_MODEL_PATH: /app/models/german_kurrent.mlmodel
|
||||||
TRAINING_TOKEN: "${OCR_TRAINING_TOKEN:-}"
|
TRAINING_TOKEN: "${OCR_TRAINING_TOKEN:-}"
|
||||||
OCR_CONFIDENCE_THRESHOLD: "0.3"
|
OCR_CONFIDENCE_THRESHOLD: "0.3"
|
||||||
@@ -106,6 +107,12 @@ services:
|
|||||||
timeout: 5s
|
timeout: 5s
|
||||||
retries: 12
|
retries: 12
|
||||||
start_period: 120s
|
start_period: 120s
|
||||||
|
read_only: true
|
||||||
|
tmpfs:
|
||||||
|
- /tmp:size=512m # training endpoints write ZIPs to /tmp; 512 MB covers typical batches (20–50 images)
|
||||||
|
cap_drop: [ALL]
|
||||||
|
security_opt:
|
||||||
|
- no-new-privileges:true
|
||||||
|
|
||||||
# --- Backend: Spring Boot ---
|
# --- Backend: Spring Boot ---
|
||||||
backend:
|
backend:
|
||||||
|
|||||||
@@ -23,8 +23,16 @@ RUN pip install --no-cache-dir -r requirements.txt
|
|||||||
|
|
||||||
COPY . .
|
COPY . .
|
||||||
|
|
||||||
|
RUN useradd --no-create-home --shell /usr/sbin/nologin --uid 1000 ocr \
|
||||||
|
&& mkdir -p /home/ocr /app/models /app/cache \
|
||||||
|
&& chown -R ocr:ocr /app /home/ocr
|
||||||
RUN chmod +x /app/entrypoint.sh
|
RUN chmod +x /app/entrypoint.sh
|
||||||
|
|
||||||
|
ENV HOME=/home/ocr
|
||||||
|
ENV HF_HOME=/app/cache
|
||||||
|
|
||||||
|
USER ocr
|
||||||
|
|
||||||
EXPOSE 8000
|
EXPOSE 8000
|
||||||
|
|
||||||
CMD ["/app/entrypoint.sh"]
|
CMD ["/app/entrypoint.sh"]
|
||||||
|
|||||||
@@ -24,7 +24,7 @@ log = logging.getLogger(__name__)
|
|||||||
BLLA_MODEL_PATH = os.environ.get("BLLA_MODEL_PATH", "/app/models/blla.mlmodel")
|
BLLA_MODEL_PATH = os.environ.get("BLLA_MODEL_PATH", "/app/models/blla.mlmodel")
|
||||||
# DOI for "General segmentation model for print and handwriting" — ketos 7 compatible.
|
# DOI for "General segmentation model for print and handwriting" — ketos 7 compatible.
|
||||||
BLLA_MODEL_DOI = "10.5281/zenodo.14602569"
|
BLLA_MODEL_DOI = "10.5281/zenodo.14602569"
|
||||||
HTRMOPO_DIR = os.path.expanduser("~/.local/share/htrmopo")
|
HTRMOPO_DIR = os.environ.get("HTRMOPO_DIR", "/app/models/.htrmopo")
|
||||||
|
|
||||||
|
|
||||||
def _model_is_loadable(path: str) -> bool:
|
def _model_is_loadable(path: str) -> bool:
|
||||||
|
|||||||
@@ -56,6 +56,8 @@ async def lifespan(app: FastAPI):
|
|||||||
"""Load lightweight models at startup. Surya loads lazily on first request."""
|
"""Load lightweight models at startup. Surya loads lazily on first request."""
|
||||||
global _models_ready
|
global _models_ready
|
||||||
|
|
||||||
|
if os.getuid() == 0:
|
||||||
|
logger.warning("Running as root — CIS Docker §4.1 violation")
|
||||||
logger.info("Loading Kraken model at startup (Surya loads lazily on first OCR request)...")
|
logger.info("Loading Kraken model at startup (Surya loads lazily on first OCR request)...")
|
||||||
kraken_engine.load_models()
|
kraken_engine.load_models()
|
||||||
load_spell_checker()
|
load_spell_checker()
|
||||||
|
|||||||
@@ -1,10 +1,35 @@
|
|||||||
"""Unit tests for ensure_blla_model.main()."""
|
"""Unit tests for ensure_blla_model.main()."""
|
||||||
|
|
||||||
|
import importlib
|
||||||
|
import os
|
||||||
from unittest.mock import MagicMock, call, patch
|
from unittest.mock import MagicMock, call, patch
|
||||||
|
|
||||||
import ensure_blla_model
|
import ensure_blla_model
|
||||||
|
|
||||||
|
|
||||||
|
# ─── HTRMOPO_DIR env var resolution ──────────────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
def test_htrmopo_dir_reads_from_env_var():
|
||||||
|
"""HTRMOPO_DIR uses the HTRMOPO_DIR env var when set, not ~ expansion."""
|
||||||
|
with patch.dict(os.environ, {"HTRMOPO_DIR": "/custom/htrmopo"}):
|
||||||
|
importlib.reload(ensure_blla_model)
|
||||||
|
result = ensure_blla_model.HTRMOPO_DIR
|
||||||
|
importlib.reload(ensure_blla_model)
|
||||||
|
assert result == "/custom/htrmopo"
|
||||||
|
|
||||||
|
|
||||||
|
def test_htrmopo_dir_default_is_fixed_path():
|
||||||
|
"""Default HTRMOPO_DIR is a fixed path not derived from ~ (no-create-home safe)."""
|
||||||
|
clean_env = {k: v for k, v in os.environ.items() if k != "HTRMOPO_DIR"}
|
||||||
|
with patch.dict(os.environ, clean_env, clear=True):
|
||||||
|
importlib.reload(ensure_blla_model)
|
||||||
|
result = ensure_blla_model.HTRMOPO_DIR
|
||||||
|
importlib.reload(ensure_blla_model)
|
||||||
|
assert "~" not in result
|
||||||
|
assert not result.startswith("/.")
|
||||||
|
|
||||||
|
|
||||||
# ─── Model already loadable ───────────────────────────────────────────────────
|
# ─── Model already loadable ───────────────────────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user