test(ocr): add orphan cleanup behavior tests for entrypoint.sh find -mtime

test_entrypoint_removes_day_old_orphans and test_entrypoint_preserves_fresh_files
verify the find -mtime +1 -delete logic using os.utime() to fabricate old mtimes
without mocking system time. Also extracts _run_entrypoint helper to remove
subprocess setup duplication.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Marcel
2026-05-18 11:19:33 +02:00
parent e31dac5c9c
commit 775b5c062e

View File

@@ -3,6 +3,7 @@
import os
import subprocess
import tempfile
import time
import pytest
@@ -12,6 +13,22 @@ from utils import _validate_zip_entry
_ENTRYPOINT = os.path.join(os.path.dirname(os.path.abspath(__file__)), "entrypoint.sh")
def _run_entrypoint(tmpdir, tmp_path):
"""Run entrypoint.sh with TMPDIR set to tmpdir; python3/uvicorn are stubbed out."""
stub_bin = tmp_path / "stub_bin"
stub_bin.mkdir(exist_ok=True)
for name in ("python3", "uvicorn"):
stub = stub_bin / name
stub.write_text("#!/bin/sh\nexit 0\n")
stub.chmod(0o755)
env = {
**os.environ,
"TMPDIR": str(tmpdir),
"PATH": f"{stub_bin}:{os.environ.get('PATH', '/usr/bin:/bin')}",
}
return subprocess.run(["bash", _ENTRYPOINT], env=env, capture_output=True, text=True)
def test_tempfile_uses_tmpdir_when_set(monkeypatch, tmp_path):
"""Python honours the TMPDIR env var when creating temporary directories.
@@ -82,6 +99,43 @@ def test_tmpdir_is_inside_persistent_cache_volume():
)
def test_entrypoint_removes_day_old_orphans(tmp_path):
"""entrypoint.sh deletes partial downloads older than 1 day from TMPDIR.
Simulates a file left behind by a docker-kill mid-download: backdate its
mtime by 2 days using os.utime(), run the entrypoint, assert it is gone.
See ADR-021.
"""
staging = tmp_path / "staging"
staging.mkdir()
stale_file = staging / "model.safetensors.partial"
stale_file.write_bytes(b"partial download")
two_days_ago = time.time() - 2 * 24 * 3600
os.utime(stale_file, (two_days_ago, two_days_ago))
result = _run_entrypoint(staging, tmp_path)
assert result.returncode == 0, f"entrypoint.sh exited {result.returncode}\nstderr: {result.stderr}"
assert not stale_file.exists(), "day-old orphan should have been deleted by entrypoint.sh"
def test_entrypoint_preserves_fresh_files(tmp_path):
"""entrypoint.sh does not delete files newer than 1 day from TMPDIR.
An in-progress download whose mtime is recent must survive the orphan
cleanup so a concurrent or just-started model fetch is not interrupted.
See ADR-021.
"""
staging = tmp_path / "staging"
staging.mkdir()
fresh_file = staging / "model.safetensors.part"
fresh_file.write_bytes(b"in progress")
# mtime is now — no os.utime() call needed
result = _run_entrypoint(staging, tmp_path)
assert result.returncode == 0, f"entrypoint.sh exited {result.returncode}\nstderr: {result.stderr}"
assert fresh_file.exists(), "recent file should not have been deleted by entrypoint.sh"
def test_zipslip_still_anchors_under_custom_tmpdir(tmp_path):
"""_validate_zip_entry rejects path-traversal when extract_dir is under a custom TMPDIR.