familienarchiv/ocr-service/Dockerfile

FROM python:3.11-slim

WORKDIR /app

# curl for healthcheck; libgomp1 for PyTorch CPU threading; libvips for kraken PDF support
RUN apt-get update && apt-get install -y --no-install-recommends \
    curl \
    libgomp1 \
    libvips42 \
    && rm -rf /var/lib/apt/lists/*

# PyTorch CPU-only — separate layer; the whl/cpu index strips all CUDA variants (~2 GB saved)
# torchvision must also come from the CPU index to match torch's operator registrations
RUN pip install --no-cache-dir \
    torch==2.7.1 \
    torchvision==0.22.1 \
    --index-url https://download.pytorch.org/whl/cpu

COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt

COPY . .

RUN chmod +x /app/entrypoint.sh

EXPOSE 8000

CMD ["/app/entrypoint.sh"]