test_entrypoint_removes_day_old_orphans and test_entrypoint_preserves_fresh_files verify the find -mtime +1 -delete logic using os.utime() to fabricate old mtimes without mocking system time. Also extracts _run_entrypoint helper to remove subprocess setup duplication. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
151 lines
5.4 KiB
Python
151 lines
5.4 KiB
Python
"""Tests for TMPDIR configuration and entrypoint mkdir behavior — ADR-021."""
|
|
|
|
import os
|
|
import subprocess
|
|
import tempfile
|
|
import time
|
|
|
|
import pytest
|
|
|
|
from fastapi import HTTPException
|
|
from utils import _validate_zip_entry
|
|
|
|
_ENTRYPOINT = os.path.join(os.path.dirname(os.path.abspath(__file__)), "entrypoint.sh")
|
|
|
|
|
|
def _run_entrypoint(tmpdir, tmp_path):
|
|
"""Run entrypoint.sh with TMPDIR set to tmpdir; python3/uvicorn are stubbed out."""
|
|
stub_bin = tmp_path / "stub_bin"
|
|
stub_bin.mkdir(exist_ok=True)
|
|
for name in ("python3", "uvicorn"):
|
|
stub = stub_bin / name
|
|
stub.write_text("#!/bin/sh\nexit 0\n")
|
|
stub.chmod(0o755)
|
|
env = {
|
|
**os.environ,
|
|
"TMPDIR": str(tmpdir),
|
|
"PATH": f"{stub_bin}:{os.environ.get('PATH', '/usr/bin:/bin')}",
|
|
}
|
|
return subprocess.run(["bash", _ENTRYPOINT], env=env, capture_output=True, text=True)
|
|
|
|
|
|
def test_tempfile_uses_tmpdir_when_set(monkeypatch, tmp_path):
|
|
"""Python honours the TMPDIR env var when creating temporary directories.
|
|
|
|
Documents the mechanism that routes Surya model staging to the persistent
|
|
cache volume instead of the 512 MB RAM tmpfs. See ADR-021.
|
|
"""
|
|
custom_tmp = tmp_path / "model_staging"
|
|
custom_tmp.mkdir()
|
|
monkeypatch.setenv("TMPDIR", str(custom_tmp))
|
|
monkeypatch.setattr(tempfile, "tempdir", None)
|
|
with tempfile.TemporaryDirectory() as td:
|
|
assert td.startswith(str(custom_tmp))
|
|
|
|
|
|
def test_entrypoint_creates_tmpdir(tmp_path):
|
|
"""entrypoint.sh creates the TMPDIR directory when it does not exist.
|
|
|
|
On a fresh ocr_cache volume, /app/cache/.tmp is absent. The entrypoint
|
|
must create it before uvicorn starts so the first Surya model download
|
|
does not exhaust the 512 MB /tmp tmpfs (ENOSPC). See ADR-021.
|
|
"""
|
|
custom_tmp = tmp_path / "model-staging"
|
|
assert not custom_tmp.exists(), "pre-condition: directory must not exist yet"
|
|
|
|
stub_bin = tmp_path / "stub_bin"
|
|
stub_bin.mkdir()
|
|
for name in ("python3", "uvicorn"):
|
|
stub = stub_bin / name
|
|
stub.write_text("#!/bin/sh\nexit 0\n")
|
|
stub.chmod(0o755)
|
|
|
|
env = {
|
|
**os.environ,
|
|
"TMPDIR": str(custom_tmp),
|
|
"PATH": f"{stub_bin}:{os.environ.get('PATH', '/usr/bin:/bin')}",
|
|
}
|
|
result = subprocess.run(
|
|
["bash", _ENTRYPOINT],
|
|
env=env,
|
|
capture_output=True,
|
|
text=True,
|
|
)
|
|
assert result.returncode == 0, (
|
|
f"entrypoint.sh exited {result.returncode}\n"
|
|
f"stdout: {result.stdout}\nstderr: {result.stderr}"
|
|
)
|
|
assert custom_tmp.exists(), (
|
|
f"entrypoint.sh did not create TMPDIR={custom_tmp}\n"
|
|
f"stdout: {result.stdout}\nstderr: {result.stderr}"
|
|
)
|
|
|
|
|
|
@pytest.mark.skipif(
|
|
not os.environ.get("TMPDIR", "").startswith("/app/cache"),
|
|
reason="TMPDIR contract only enforced inside the OCR Docker container",
|
|
)
|
|
def test_tmpdir_is_inside_persistent_cache_volume():
|
|
"""TMPDIR must point to the persistent cache volume, not a RAM tmpfs.
|
|
|
|
Catches accidental reversion to /tmp or any tmpfs-backed path.
|
|
Runs only inside the OCR Docker container where TMPDIR=/app/cache/.tmp.
|
|
See ADR-021.
|
|
"""
|
|
tmpdir = os.environ["TMPDIR"]
|
|
assert tmpdir.startswith("/app/cache"), (
|
|
f"TMPDIR={tmpdir!r} must be under /app/cache to route model downloads "
|
|
"to the SSD-backed cache volume — see ADR-021"
|
|
)
|
|
|
|
|
|
def test_entrypoint_removes_day_old_orphans(tmp_path):
|
|
"""entrypoint.sh deletes partial downloads older than 1 day from TMPDIR.
|
|
|
|
Simulates a file left behind by a docker-kill mid-download: backdate its
|
|
mtime by 2 days using os.utime(), run the entrypoint, assert it is gone.
|
|
See ADR-021.
|
|
"""
|
|
staging = tmp_path / "staging"
|
|
staging.mkdir()
|
|
stale_file = staging / "model.safetensors.partial"
|
|
stale_file.write_bytes(b"partial download")
|
|
two_days_ago = time.time() - 2 * 24 * 3600
|
|
os.utime(stale_file, (two_days_ago, two_days_ago))
|
|
|
|
result = _run_entrypoint(staging, tmp_path)
|
|
assert result.returncode == 0, f"entrypoint.sh exited {result.returncode}\nstderr: {result.stderr}"
|
|
assert not stale_file.exists(), "day-old orphan should have been deleted by entrypoint.sh"
|
|
|
|
|
|
def test_entrypoint_preserves_fresh_files(tmp_path):
|
|
"""entrypoint.sh does not delete files newer than 1 day from TMPDIR.
|
|
|
|
An in-progress download whose mtime is recent must survive the orphan
|
|
cleanup so a concurrent or just-started model fetch is not interrupted.
|
|
See ADR-021.
|
|
"""
|
|
staging = tmp_path / "staging"
|
|
staging.mkdir()
|
|
fresh_file = staging / "model.safetensors.part"
|
|
fresh_file.write_bytes(b"in progress")
|
|
# mtime is now — no os.utime() call needed
|
|
|
|
result = _run_entrypoint(staging, tmp_path)
|
|
assert result.returncode == 0, f"entrypoint.sh exited {result.returncode}\nstderr: {result.stderr}"
|
|
assert fresh_file.exists(), "recent file should not have been deleted by entrypoint.sh"
|
|
|
|
|
|
def test_zipslip_still_anchors_under_custom_tmpdir(tmp_path):
|
|
"""_validate_zip_entry rejects path-traversal when extract_dir is under a custom TMPDIR.
|
|
|
|
When TMPDIR=/app/cache/.tmp, extraction dirs live under that path.
|
|
Verifies os.path.realpath() still anchors correctly against the non-default base.
|
|
"""
|
|
extract_dir = tmp_path / "model-staging" / "tmpXXX"
|
|
extract_dir.mkdir(parents=True)
|
|
|
|
with pytest.raises(HTTPException) as exc_info:
|
|
_validate_zip_entry("../evil.py", str(extract_dir))
|
|
assert exc_info.value.status_code == 400
|