From 7769dbc9f46a574e073ebe00add1bf263419d679 Mon Sep 17 00:00:00 2001 From: Marcel Date: Sun, 17 May 2026 17:43:18 +0200 Subject: [PATCH] security(ocr): apply container hardening baseline to docker-compose.prod.yml MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Mirror the CIS Docker §4.1/§4.6 hardening from docker-compose.yml to the production/staging compose file, which is standalone (not an overlay). - Fix cache volume mount path: ocr-cache:/root/.cache → /app/cache (matches the non-root user's HF_HOME/XDG_CACHE_HOME, avoids PermissionError) - Add HF_HOME, XDG_CACHE_HOME, TORCH_HOME env vars so HuggingFace, ketos, and PyTorch all write to the declared writable volumes, not HOME - Add read_only: true, tmpfs (/tmp:512m), cap_drop: [ALL], no-new-privileges:true — matching the dev baseline Also extend DEPLOYMENT.md §8 upgrade notes to cover all three environments (dev/production/staging), each with its correct project-namespaced volume name. Co-Authored-By: Claude Sonnet 4.6 --- docker-compose.prod.yml | 12 +++++++++++- docs/DEPLOYMENT.md | 11 +++++++++-- 2 files changed, 20 insertions(+), 3 deletions(-) diff --git a/docker-compose.prod.yml b/docker-compose.prod.yml index 9fcb453f..dbae6e9a 100644 --- a/docker-compose.prod.yml +++ b/docker-compose.prod.yml @@ -142,8 +142,11 @@ services: memswap_limit: ${OCR_MEM_LIMIT:-12g} volumes: - ocr-models:/app/models - - ocr-cache:/root/.cache + - ocr-cache:/app/cache # HuggingFace / ketos cache — prevents re-downloads on recreate (HF_HOME) environment: + HF_HOME: /app/cache + XDG_CACHE_HOME: /app/cache + TORCH_HOME: /app/models/torch KRAKEN_MODEL_PATH: /app/models/german_kurrent.mlmodel TRAINING_TOKEN: ${OCR_TRAINING_TOKEN} OCR_CONFIDENCE_THRESHOLD: "0.3" @@ -161,6 +164,13 @@ services: timeout: 5s retries: 12 start_period: 120s + read_only: true + tmpfs: + - /tmp:size=512m # training endpoints write ZIPs to /tmp; 512 MB covers typical batches (20–50 images) + cap_drop: + - ALL + security_opt: + - no-new-privileges:true backend: image: familienarchiv/backend:${TAG:-nightly} diff --git a/docs/DEPLOYMENT.md b/docs/DEPLOYMENT.md index 9da72276..aaba04e2 100644 --- a/docs/DEPLOYMENT.md +++ b/docs/DEPLOYMENT.md @@ -566,12 +566,19 @@ Version-specific one-time steps that must be run before or after upgrading to a ### Upgrading to PR #611 — non-root OCR container -The OCR cache volume path changed from `/root/.cache` to `/app/cache` (PR #611 — CIS Docker §4.1 hardening). The existing `ocr_cache` volume was written as root and is inaccessible to the new non-root `ocr` user, causing a `PermissionError` on startup. +The OCR cache volume path changed from `/root/.cache` to `/app/cache` (PR #611 — CIS Docker §4.1 hardening). The existing volume was written as root and is inaccessible to the new non-root `ocr` user, causing a `PermissionError` on startup. -**Before starting the updated container stack**, drop the old root-owned volume: +**Before starting the updated container stack**, drop the old root-owned volume. The volume name depends on the compose project name: ```bash +# Dev (docker-compose.yml — project name: familienarchiv) docker volume rm familienarchiv_ocr_cache + +# Production (docker-compose.prod.yml -p archiv-production) +docker volume rm archiv-production_ocr-cache + +# Staging (docker-compose.prod.yml -p archiv-staging) +docker volume rm archiv-staging_ocr-cache ``` The volume is recreated automatically on `docker compose up`. The OCR service will re-download its model cache on first startup (approximately 1–2 GB, one-time cost).