From a4f2047bccb607172f65b52c9ad4796e88fda6ea Mon Sep 17 00:00:00 2001 From: Marcel Date: Mon, 11 May 2026 14:07:16 +0200 Subject: [PATCH] security(ocr): pin ALLOWED_PDF_HOSTS=minio in prod ocr-service env MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Production never sources PDFs from localhost or 127.0.0.1 — the OCR service only reads from MinIO over the internal docker network. The Python default (`minio,localhost,127.0.0.1`) was permissive on purpose for local dev, but in production a future change to that default — or a host-env override — would silently broaden the SSRF surface. Pinning the env var explicitly here freezes the allowlist to the one hostname production actually needs. `docker compose config --quiet` and `--profile staging config --quiet` both still pass. Verified the resolved config emits `ALLOWED_PDF_HOSTS: minio`. Addresses @nora's round-2 suggestion on PR #499 — "five characters of YAML, lifetime guarantee". Co-Authored-By: Claude Opus 4.7 --- docker-compose.prod.yml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/docker-compose.prod.yml b/docker-compose.prod.yml index 626d44a5..b821ec33 100644 --- a/docker-compose.prod.yml +++ b/docker-compose.prod.yml @@ -128,6 +128,11 @@ services: TRAINING_TOKEN: ${OCR_TRAINING_TOKEN} OCR_CONFIDENCE_THRESHOLD: "0.3" OCR_CONFIDENCE_THRESHOLD_KURRENT: "0.5" + # SSRF allowlist pinned explicitly to the internal MinIO hostname. + # In prod the OCR service only fetches PDFs from MinIO over the + # docker network; localhost/127.0.0.1 are dev-only sources and + # must NOT be reachable here. Do not widen to `*`. + ALLOWED_PDF_HOSTS: "minio" networks: - archive-net healthcheck: