feat: OCR pipeline with NDJSON streaming and real-time progress (#226, #227, #231) #229

Merged
marcel merged 74 commits from feat/issue-226-227-ocr-pipeline-polygon into main 2026-04-13 12:39:04 +02:00
2 changed files with 3 additions and 1 deletions
Showing only changes of commit 31519af1a4 - Show all commits

View File

@@ -2,10 +2,11 @@ FROM python:3.11-slim
WORKDIR /app
# curl for healthcheck; libgomp1 for PyTorch CPU threading
# curl for healthcheck; libgomp1 for PyTorch CPU threading; libvips for kraken PDF support
RUN apt-get update && apt-get install -y --no-install-recommends \
curl \
libgomp1 \
libvips42 \
&& rm -rf /var/lib/apt/lists/*
# PyTorch CPU-only — separate layer; the whl/cpu index strips all CUDA variants (~2 GB saved)

View File

@@ -6,4 +6,5 @@ torchvision==0.22.1
transformers>=4.56.1,<5.0.0
pillow>=10.2.0,<11.0.0
pypdfium2==4.30.0
pyvips>=2.2.0
httpx==0.28.1