Files
familienarchiv/docker-compose.yml
Marcel ab24786d2a security(ocr): harden compose — fix cache volume path, add read_only + cap_drop
Move ocr_cache mount from /root/.cache to /app/cache (correct path for
non-root user). Add HF_HOME so Hugging Face resolves to the same path.
Add runtime hardening: read_only, tmpfs /tmp (512 MB cap), cap_drop ALL,
no-new-privileges.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-17 16:47:18 +02:00

217 lines
7.2 KiB
YAML
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
services:
# --- Datenbank: PostgreSQL ---
db:
image: postgres:16-alpine
container_name: archive-db
restart: unless-stopped
environment:
POSTGRES_USER: ${POSTGRES_USER}
POSTGRES_PASSWORD: ${POSTGRES_PASSWORD}
POSTGRES_DB: ${POSTGRES_DB}
volumes:
- ./data/postgres:/var/lib/postgresql/data
ports:
- "${PORT_DB}:5432"
networks:
- archiv-net
healthcheck:
test: ["CMD-SHELL", "pg_isready -U ${POSTGRES_USER} -d ${POSTGRES_DB}"]
interval: 5s
timeout: 5s
retries: 5
# --- Object Storage: MinIO (S3 kompatibel) ---
minio:
image: minio/minio:latest
container_name: archive-minio
restart: unless-stopped
command: server /data --console-address ":9001"
environment:
MINIO_ROOT_USER: ${MINIO_ROOT_USER}
MINIO_ROOT_PASSWORD: ${MINIO_ROOT_PASSWORD}
volumes:
- ./data/minio:/data
ports:
- "${PORT_MINIO_API}:9000" # API Port
- "${PORT_MINIO_CONSOLE}:9001" # Web-Oberfläche
networks:
- archiv-net
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:9000/minio/health/live"]
interval: 30s
timeout: 20s
retries: 3
# --- Helper: Erstellt automatisch den Bucket ---
create-buckets:
image: minio/mc
depends_on:
minio:
condition: service_healthy
entrypoint: >
/bin/sh -c "
/usr/bin/mc alias set myminio http://minio:9000 ${MINIO_ROOT_USER} ${MINIO_ROOT_PASSWORD};
/usr/bin/mc mb myminio/${MINIO_DEFAULT_BUCKETS} --ignore-existing;
/usr/bin/mc anonymous set private myminio/${MINIO_DEFAULT_BUCKETS};
exit 0;
"
networks:
- archiv-net
# --- Mail catcher: Mailpit (dev only) ---
# Catches all outgoing emails and displays them in a web UI.
# Access the inbox at http://localhost:${PORT_MAILPIT_UI} after starting the stack.
mailpit:
image: axllent/mailpit:latest
container_name: archive-mailpit
restart: unless-stopped
ports:
- "${PORT_MAILPIT_UI:-8025}:8025" # Web UI
- "${PORT_MAILPIT_SMTP:-1025}:1025" # SMTP
networks:
- archiv-net
# --- OCR: Python microservice (Surya + Kraken) ---
# Single-node only: OCR training reloads the model in-process after each run.
# Running multiple replicas would cause training conflicts and model-state divergence.
# See ADR-001 for the architectural rationale.
ocr-service:
build:
context: ./ocr-service
dockerfile: Dockerfile
container_name: archive-ocr
restart: unless-stopped
expose:
- "8000"
mem_limit: 12g
memswap_limit: 12g
volumes:
- ocr_models:/app/models
- ocr_cache:/app/cache
environment:
HF_HOME: /app/cache
KRAKEN_MODEL_PATH: /app/models/german_kurrent.mlmodel
TRAINING_TOKEN: "${OCR_TRAINING_TOKEN:-}"
OCR_CONFIDENCE_THRESHOLD: "0.3"
OCR_CONFIDENCE_THRESHOLD_KURRENT: "0.5"
RECOGNITION_BATCH_SIZE: "16"
DETECTOR_BATCH_SIZE: "8"
OCR_CLAHE_CLIP_LIMIT: "2.0" # CLAHE contrast limit (multiplier of average histogram frequency)
OCR_CLAHE_TILE_SIZE: "8" # CLAHE tile grid size (NxN tiles per page)
OCR_MAX_CACHED_MODELS: "2" # LRU cache; each model ~500 MB, so 2 = ~1 GB resident
networks:
- archiv-net
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:8000/health"]
interval: 10s
timeout: 5s
retries: 12
start_period: 120s
read_only: true
tmpfs:
- /tmp:size=512m # training endpoints write ZIPs to /tmp; 512 MB covers typical batches (2050 images)
cap_drop: [ALL]
security_opt:
- no-new-privileges:true
# --- Backend: Spring Boot ---
backend:
build:
context: ./backend
dockerfile: Dockerfile
container_name: archive-backend
restart: unless-stopped
volumes:
- ./import:/import
depends_on:
db:
condition: service_healthy
minio:
condition: service_healthy
mailpit:
condition: service_started
ocr-service:
condition: service_started
environment:
SPRING_DATASOURCE_URL: jdbc:postgresql://db:5432/${POSTGRES_DB}
SPRING_DATASOURCE_USERNAME: ${POSTGRES_USER}
SPRING_DATASOURCE_PASSWORD: ${POSTGRES_PASSWORD}
S3_ENDPOINT: http://minio:9000
S3_ACCESS_KEY: ${MINIO_ROOT_USER}
S3_SECRET_KEY: ${MINIO_ROOT_PASSWORD}
S3_BUCKET_NAME: ${MINIO_DEFAULT_BUCKETS}
S3_REGION: us-east-1
SPRING_PROFILES_ACTIVE: dev,e2e
APP_BASE_URL: ${APP_BASE_URL:-http://localhost:3000}
# Defaults to the local Mailpit catcher — override in .env for production SMTP
MAIL_HOST: ${MAIL_HOST:-mailpit}
MAIL_PORT: ${MAIL_PORT:-1025}
MAIL_USERNAME: ${MAIL_USERNAME:-}
MAIL_PASSWORD: ${MAIL_PASSWORD:-}
APP_MAIL_FROM: ${APP_MAIL_FROM:-noreply@familienarchiv.local}
# Mailpit needs no auth or STARTTLS; production SMTP overrides these via .env
SPRING_MAIL_PROPERTIES_MAIL_SMTP_AUTH: ${MAIL_SMTP_AUTH:-false}
SPRING_MAIL_PROPERTIES_MAIL_SMTP_STARTTLS_ENABLE: ${MAIL_STARTTLS_ENABLE:-false}
APP_OCR_BASE_URL: http://ocr-service:8000
APP_OCR_TRAINING_TOKEN: "${OCR_TRAINING_TOKEN:-}"
SENTRY_DSN: ${SENTRY_DSN:-}
SENTRY_TRACES_SAMPLE_RATE: ${SENTRY_TRACES_SAMPLE_RATE:-1.0}
# Observability: send traces to Tempo inside archiv-net (OTLP gRPC port 4317)
# Tempo is defined in docker-compose.observability.yml (future issue).
# OTLP failures are non-fatal — backend starts cleanly without the observability stack.
OTEL_EXPORTER_OTLP_ENDPOINT: http://tempo:4317
# 10% sampling in this compose (dev + staging) — override locally to 1.0 if needed
MANAGEMENT_TRACING_SAMPLING_PROBABILITY: "0.1"
ports:
- "${PORT_BACKEND}:8080"
# Management port — Prometheus scrapes /actuator/prometheus from inside archiv-net.
# Not exposed to the host; Docker service-name DNS (backend:8081) is sufficient.
expose:
- "8081"
networks:
- archiv-net
healthcheck:
test: ["CMD-SHELL", "wget -qO- http://localhost:8080/actuator/health | grep -q UP || exit 1"]
interval: 15s
timeout: 5s
retries: 10
start_period: 30s # JAR starts in ~15s; was 60s when compilation happened at startup
# --- Frontend: SvelteKit (Dev Server) ---
frontend:
build:
context: ./frontend
dockerfile: Dockerfile
target: development # Dockerfile is multi-stage; default would be the production stage
container_name: archive-frontend
restart: unless-stopped
depends_on:
db:
condition: service_healthy
minio:
condition: service_healthy
backend:
condition: service_healthy
volumes:
- ./frontend:/app
# Keep container's node_modules separate from host to avoid OS binary conflicts
- frontend_node_modules:/app/node_modules
environment:
# SSR calls (server-side) use the internal Docker network
API_INTERNAL_URL: http://backend:8080
# Vite dev proxy forwards /api from browser to the backend container
API_PROXY_TARGET: http://backend:8080
ports:
- "${PORT_FRONTEND}:5173"
networks:
- archiv-net
networks:
archiv-net:
driver: bridge
volumes:
frontend_node_modules:
ocr_models:
ocr_cache: