fix(ocr): create TMPDIR on startup and clear day-old orphans
On a fresh ocr_cache volume /app/cache/.tmp does not exist yet. The mkdir ensures the first Surya model download can proceed without ENOSPC on the 512 MB /tmp tmpfs. The find cleanup removes fragments left by docker-kill mid-download, preventing cross-job ground-truth leakage. Fixes #614. See ADR-021. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -1,6 +1,13 @@
|
||||
#!/bin/bash
|
||||
set -euo pipefail
|
||||
|
||||
# Ensure TMPDIR exists on the persistent cache volume (created by the volume-init
|
||||
# container, but guaranteed here for fresh volumes and bare docker-run usage).
|
||||
# Orphaned fragments from prior docker-kill during model downloads are cleared
|
||||
# on startup to prevent cross-job ground-truth leakage (Surya staging files).
|
||||
mkdir -p "${TMPDIR:-/tmp}"
|
||||
find "${TMPDIR:-/tmp}" -mindepth 1 -mtime +1 -delete 2>/dev/null || true
|
||||
|
||||
# Validate the blla segmentation base model and download it if missing or
|
||||
# incompatible. ketos 7 dropped support for legacy PyTorch ZIP archives —
|
||||
# this ensures the volume always holds a loadable CoreML protobuf model.
|
||||
|
||||
97
ocr-service/test_tmpdir.py
Normal file
97
ocr-service/test_tmpdir.py
Normal file
@@ -0,0 +1,97 @@
|
||||
"""Tests for TMPDIR configuration and entrypoint mkdir behavior — ADR-021."""
|
||||
|
||||
import os
|
||||
import subprocess
|
||||
import tempfile
|
||||
|
||||
import pytest
|
||||
|
||||
try:
|
||||
from fastapi import HTTPException
|
||||
from main import _validate_zip_entry
|
||||
HAS_MAIN = True
|
||||
except ImportError:
|
||||
HAS_MAIN = False
|
||||
|
||||
_ENTRYPOINT = os.path.join(os.path.dirname(os.path.abspath(__file__)), "entrypoint.sh")
|
||||
|
||||
|
||||
def test_tempfile_uses_tmpdir_when_set(monkeypatch, tmp_path):
|
||||
"""Python honours the TMPDIR env var when creating temporary directories.
|
||||
|
||||
Documents the mechanism that routes Surya model staging to the persistent
|
||||
cache volume instead of the 512 MB RAM tmpfs. See ADR-021.
|
||||
"""
|
||||
custom_tmp = tmp_path / "model_staging"
|
||||
custom_tmp.mkdir()
|
||||
monkeypatch.setenv("TMPDIR", str(custom_tmp))
|
||||
monkeypatch.setattr(tempfile, "tempdir", None)
|
||||
with tempfile.TemporaryDirectory() as td:
|
||||
assert td.startswith(str(custom_tmp))
|
||||
|
||||
|
||||
def test_entrypoint_creates_tmpdir(tmp_path):
|
||||
"""entrypoint.sh creates the TMPDIR directory when it does not exist.
|
||||
|
||||
On a fresh ocr_cache volume, /app/cache/.tmp is absent. The entrypoint
|
||||
must create it before uvicorn starts so the first Surya model download
|
||||
does not exhaust the 512 MB /tmp tmpfs (ENOSPC). See ADR-021.
|
||||
"""
|
||||
custom_tmp = tmp_path / "model-staging"
|
||||
assert not custom_tmp.exists(), "pre-condition: directory must not exist yet"
|
||||
|
||||
stub_bin = tmp_path / "stub_bin"
|
||||
stub_bin.mkdir()
|
||||
for name in ("python3", "uvicorn"):
|
||||
stub = stub_bin / name
|
||||
stub.write_text("#!/bin/sh\nexit 0\n")
|
||||
stub.chmod(0o755)
|
||||
|
||||
env = {
|
||||
**os.environ,
|
||||
"TMPDIR": str(custom_tmp),
|
||||
"PATH": f"{stub_bin}:{os.environ.get('PATH', '/usr/bin:/bin')}",
|
||||
}
|
||||
result = subprocess.run(
|
||||
["bash", _ENTRYPOINT],
|
||||
env=env,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
)
|
||||
assert custom_tmp.exists(), (
|
||||
f"entrypoint.sh did not create TMPDIR={custom_tmp}\n"
|
||||
f"stdout: {result.stdout}\nstderr: {result.stderr}"
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
not os.environ.get("TMPDIR", "").startswith("/app/cache"),
|
||||
reason="TMPDIR contract only enforced inside the OCR Docker container",
|
||||
)
|
||||
def test_tmpdir_is_inside_persistent_cache_volume():
|
||||
"""TMPDIR must point to the persistent cache volume, not a RAM tmpfs.
|
||||
|
||||
Catches accidental reversion to /tmp or any tmpfs-backed path.
|
||||
Runs only inside the OCR Docker container where TMPDIR=/app/cache/.tmp.
|
||||
See ADR-021.
|
||||
"""
|
||||
tmpdir = os.environ["TMPDIR"]
|
||||
assert tmpdir.startswith("/app/cache"), (
|
||||
f"TMPDIR={tmpdir!r} must be under /app/cache to route model downloads "
|
||||
"to the SSD-backed cache volume — see ADR-021"
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.skipif(not HAS_MAIN, reason="requires full ML stack (not available in CI)")
|
||||
def test_zipslip_still_anchors_under_custom_tmpdir(tmp_path):
|
||||
"""_validate_zip_entry rejects path-traversal when extract_dir is under a custom TMPDIR.
|
||||
|
||||
When TMPDIR=/app/cache/.tmp, extraction dirs live under that path.
|
||||
Verifies os.path.realpath() still anchors correctly against the non-default base.
|
||||
"""
|
||||
extract_dir = tmp_path / "model-staging" / "tmpXXX"
|
||||
extract_dir.mkdir(parents=True)
|
||||
|
||||
with pytest.raises(HTTPException) as exc_info:
|
||||
_validate_zip_entry("../evil.py", str(extract_dir))
|
||||
assert exc_info.value.status_code == 400
|
||||
Reference in New Issue
Block a user