feat: OCR pipeline with NDJSON streaming and real-time progress (#226, #227, #231) #229

Merged
marcel merged 74 commits from feat/issue-226-227-ocr-pipeline-polygon into main 2026-04-13 12:39:04 +02:00
2 changed files with 3 additions and 0 deletions
Showing only changes of commit 37abc376ec - Show all commits

View File

@@ -9,8 +9,10 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
&& rm -rf /var/lib/apt/lists/*
# PyTorch CPU-only — separate layer; the whl/cpu index strips all CUDA variants (~2 GB saved)
# torchvision must also come from the CPU index to match torch's operator registrations
RUN pip install --no-cache-dir \
torch==2.7.1 \
torchvision==0.22.1 \
--index-url https://download.pytorch.org/whl/cpu
COPY requirements.txt .

View File

@@ -2,6 +2,7 @@ fastapi[standard]==0.115.6
surya-ocr==0.17.1
kraken==7.0
torch==2.7.1
torchvision==0.22.1
transformers>=4.56.1,<5.0.0
pillow>=10.2.0,<11.0.0
pypdfium2==4.30.0