diff --git a/ocr-service/test_tmpdir.py b/ocr-service/test_tmpdir.py index 1d40433d..835e8c07 100644 --- a/ocr-service/test_tmpdir.py +++ b/ocr-service/test_tmpdir.py @@ -3,6 +3,7 @@ import os import subprocess import tempfile +import time import pytest @@ -12,6 +13,22 @@ from utils import _validate_zip_entry _ENTRYPOINT = os.path.join(os.path.dirname(os.path.abspath(__file__)), "entrypoint.sh") +def _run_entrypoint(tmpdir, tmp_path): + """Run entrypoint.sh with TMPDIR set to tmpdir; python3/uvicorn are stubbed out.""" + stub_bin = tmp_path / "stub_bin" + stub_bin.mkdir(exist_ok=True) + for name in ("python3", "uvicorn"): + stub = stub_bin / name + stub.write_text("#!/bin/sh\nexit 0\n") + stub.chmod(0o755) + env = { + **os.environ, + "TMPDIR": str(tmpdir), + "PATH": f"{stub_bin}:{os.environ.get('PATH', '/usr/bin:/bin')}", + } + return subprocess.run(["bash", _ENTRYPOINT], env=env, capture_output=True, text=True) + + def test_tempfile_uses_tmpdir_when_set(monkeypatch, tmp_path): """Python honours the TMPDIR env var when creating temporary directories. @@ -82,6 +99,43 @@ def test_tmpdir_is_inside_persistent_cache_volume(): ) +def test_entrypoint_removes_day_old_orphans(tmp_path): + """entrypoint.sh deletes partial downloads older than 1 day from TMPDIR. + + Simulates a file left behind by a docker-kill mid-download: backdate its + mtime by 2 days using os.utime(), run the entrypoint, assert it is gone. + See ADR-021. + """ + staging = tmp_path / "staging" + staging.mkdir() + stale_file = staging / "model.safetensors.partial" + stale_file.write_bytes(b"partial download") + two_days_ago = time.time() - 2 * 24 * 3600 + os.utime(stale_file, (two_days_ago, two_days_ago)) + + result = _run_entrypoint(staging, tmp_path) + assert result.returncode == 0, f"entrypoint.sh exited {result.returncode}\nstderr: {result.stderr}" + assert not stale_file.exists(), "day-old orphan should have been deleted by entrypoint.sh" + + +def test_entrypoint_preserves_fresh_files(tmp_path): + """entrypoint.sh does not delete files newer than 1 day from TMPDIR. + + An in-progress download whose mtime is recent must survive the orphan + cleanup so a concurrent or just-started model fetch is not interrupted. + See ADR-021. + """ + staging = tmp_path / "staging" + staging.mkdir() + fresh_file = staging / "model.safetensors.part" + fresh_file.write_bytes(b"in progress") + # mtime is now — no os.utime() call needed + + result = _run_entrypoint(staging, tmp_path) + assert result.returncode == 0, f"entrypoint.sh exited {result.returncode}\nstderr: {result.stderr}" + assert fresh_file.exists(), "recent file should not have been deleted by entrypoint.sh" + + def test_zipslip_still_anchors_under_custom_tmpdir(tmp_path): """_validate_zip_entry rejects path-traversal when extract_dir is under a custom TMPDIR.