FROM python:3.11.9-slim WORKDIR /app # curl for healthcheck; libgomp1 for PyTorch CPU threading; libvips for kraken PDF support # libglib2.0-0 is required by opencv-python-headless on Debian slim RUN apt-get update && apt-get install -y --no-install-recommends \ curl \ libgomp1 \ libvips42 \ libglib2.0-0 \ && rm -rf /var/lib/apt/lists/* # PyTorch CPU-only — separate layer; the whl/cpu index strips all CUDA variants (~2 GB saved) # torchvision must also come from the CPU index to match torch's operator registrations RUN pip install --no-cache-dir \ torch==2.7.1 \ torchvision==0.22.1 \ --index-url https://download.pytorch.org/whl/cpu COPY requirements.txt . RUN pip install --no-cache-dir -r requirements.txt COPY . . RUN useradd --no-create-home --shell /usr/sbin/nologin --uid 1000 ocr \ && mkdir -p /home/ocr /app/models /app/cache \ && chown -R ocr:ocr /app /home/ocr RUN chmod +x /app/entrypoint.sh ENV HOME=/home/ocr ENV HF_HOME=/app/cache ENV XDG_CACHE_HOME=/app/cache ENV TORCH_HOME=/app/models/torch USER ocr EXPOSE 8000 CMD ["/app/entrypoint.sh"]