- POST /train in ocr-service with ZIP Slip validation, TemporaryDirectory, ketos transfer learning, timestamped backups (keep last 3), in-process reload - X-Training-Token auth (no-op in dev when TRAINING_TOKEN env is empty) - trainModel() in OcrClient interface + RestClientOcrClient (10-min timeout, multipart upload, forwards X-Training-Token when configured) - TRAINING_TOKEN env var wired in docker-compose; --workers 2 in Dockerfile so /health stays responsive during synchronous training Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
27 lines
773 B
Docker
27 lines
773 B
Docker
FROM python:3.11-slim
|
|
|
|
WORKDIR /app
|
|
|
|
# curl for healthcheck; libgomp1 for PyTorch CPU threading; libvips for kraken PDF support
|
|
RUN apt-get update && apt-get install -y --no-install-recommends \
|
|
curl \
|
|
libgomp1 \
|
|
libvips42 \
|
|
&& rm -rf /var/lib/apt/lists/*
|
|
|
|
# PyTorch CPU-only — separate layer; the whl/cpu index strips all CUDA variants (~2 GB saved)
|
|
# torchvision must also come from the CPU index to match torch's operator registrations
|
|
RUN pip install --no-cache-dir \
|
|
torch==2.7.1 \
|
|
torchvision==0.22.1 \
|
|
--index-url https://download.pytorch.org/whl/cpu
|
|
|
|
COPY requirements.txt .
|
|
RUN pip install --no-cache-dir -r requirements.txt
|
|
|
|
COPY . .
|
|
|
|
EXPOSE 8000
|
|
|
|
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000", "--workers", "2"]
|