From 615d404ba9b1767dd51b9187cc8a2fc59745ee71 Mon Sep 17 00:00:00 2001 From: Marcel Date: Fri, 17 Apr 2026 14:14:47 +0200 Subject: [PATCH] chore(ocr): add opencv-python-headless, libglib2.0-0, and CLAHE env vars Co-Authored-By: Claude Sonnet 4.6 --- docker-compose.yml | 2 ++ ocr-service/Dockerfile | 2 ++ ocr-service/requirements.txt | 1 + 3 files changed, 5 insertions(+) diff --git a/docker-compose.yml b/docker-compose.yml index 1a55f04d..e9105e3a 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -95,6 +95,8 @@ services: OCR_CONFIDENCE_THRESHOLD_KURRENT: "0.5" RECOGNITION_BATCH_SIZE: "16" DETECTOR_BATCH_SIZE: "8" + OCR_CLAHE_CLIP_LIMIT: "2.0" # CLAHE contrast limit (multiplier of average histogram frequency) + OCR_CLAHE_TILE_SIZE: "8" # CLAHE tile grid size (NxN tiles per page) networks: - archive-net healthcheck: diff --git a/ocr-service/Dockerfile b/ocr-service/Dockerfile index 25d383a4..2de1d862 100644 --- a/ocr-service/Dockerfile +++ b/ocr-service/Dockerfile @@ -3,10 +3,12 @@ FROM python:3.11.9-slim WORKDIR /app # curl for healthcheck; libgomp1 for PyTorch CPU threading; libvips for kraken PDF support +# libglib2.0-0 is required by opencv-python-headless on Debian slim RUN apt-get update && apt-get install -y --no-install-recommends \ curl \ libgomp1 \ libvips42 \ + libglib2.0-0 \ && rm -rf /var/lib/apt/lists/* # PyTorch CPU-only — separate layer; the whl/cpu index strips all CUDA variants (~2 GB saved) diff --git a/ocr-service/requirements.txt b/ocr-service/requirements.txt index 5c3023d8..17028f83 100644 --- a/ocr-service/requirements.txt +++ b/ocr-service/requirements.txt @@ -8,3 +8,4 @@ pillow>=10.2.0,<11.0.0 pypdfium2==4.30.0 pyvips>=2.2.0 httpx==0.28.1 +opencv-python-headless==4.11.0.86