"""Tests for TMPDIR configuration and entrypoint mkdir behavior — ADR-021.""" import os import subprocess import tempfile import pytest from fastapi import HTTPException from utils import _validate_zip_entry _ENTRYPOINT = os.path.join(os.path.dirname(os.path.abspath(__file__)), "entrypoint.sh") def test_tempfile_uses_tmpdir_when_set(monkeypatch, tmp_path): """Python honours the TMPDIR env var when creating temporary directories. Documents the mechanism that routes Surya model staging to the persistent cache volume instead of the 512 MB RAM tmpfs. See ADR-021. """ custom_tmp = tmp_path / "model_staging" custom_tmp.mkdir() monkeypatch.setenv("TMPDIR", str(custom_tmp)) monkeypatch.setattr(tempfile, "tempdir", None) with tempfile.TemporaryDirectory() as td: assert td.startswith(str(custom_tmp)) def test_entrypoint_creates_tmpdir(tmp_path): """entrypoint.sh creates the TMPDIR directory when it does not exist. On a fresh ocr_cache volume, /app/cache/.tmp is absent. The entrypoint must create it before uvicorn starts so the first Surya model download does not exhaust the 512 MB /tmp tmpfs (ENOSPC). See ADR-021. """ custom_tmp = tmp_path / "model-staging" assert not custom_tmp.exists(), "pre-condition: directory must not exist yet" stub_bin = tmp_path / "stub_bin" stub_bin.mkdir() for name in ("python3", "uvicorn"): stub = stub_bin / name stub.write_text("#!/bin/sh\nexit 0\n") stub.chmod(0o755) env = { **os.environ, "TMPDIR": str(custom_tmp), "PATH": f"{stub_bin}:{os.environ.get('PATH', '/usr/bin:/bin')}", } result = subprocess.run( ["bash", _ENTRYPOINT], env=env, capture_output=True, text=True, ) assert custom_tmp.exists(), ( f"entrypoint.sh did not create TMPDIR={custom_tmp}\n" f"stdout: {result.stdout}\nstderr: {result.stderr}" ) @pytest.mark.skipif( not os.environ.get("TMPDIR", "").startswith("/app/cache"), reason="TMPDIR contract only enforced inside the OCR Docker container", ) def test_tmpdir_is_inside_persistent_cache_volume(): """TMPDIR must point to the persistent cache volume, not a RAM tmpfs. Catches accidental reversion to /tmp or any tmpfs-backed path. Runs only inside the OCR Docker container where TMPDIR=/app/cache/.tmp. See ADR-021. """ tmpdir = os.environ["TMPDIR"] assert tmpdir.startswith("/app/cache"), ( f"TMPDIR={tmpdir!r} must be under /app/cache to route model downloads " "to the SSD-backed cache volume — see ADR-021" ) def test_zipslip_still_anchors_under_custom_tmpdir(tmp_path): """_validate_zip_entry rejects path-traversal when extract_dir is under a custom TMPDIR. When TMPDIR=/app/cache/.tmp, extraction dirs live under that path. Verifies os.path.realpath() still anchors correctly against the non-default base. """ extract_dir = tmp_path / "model-staging" / "tmpXXX" extract_dir.mkdir(parents=True) with pytest.raises(HTTPException) as exc_info: _validate_zip_entry("../evil.py", str(extract_dir)) assert exc_info.value.status_code == 400