All checks were successful
CI / Unit & Component Tests (pull_request) Successful in 3m3s
CI / OCR Service Tests (pull_request) Successful in 18s
CI / Backend Unit Tests (pull_request) Successful in 3m4s
CI / fail2ban Regex (pull_request) Successful in 43s
CI / Semgrep Security Scan (pull_request) Successful in 18s
CI / Compose Bucket Idempotency (pull_request) Successful in 59s
Mirror the CIS Docker §4.1/§4.6 hardening from docker-compose.yml to the production/staging compose file, which is standalone (not an overlay). - Fix cache volume mount path: ocr-cache:/root/.cache → /app/cache (matches the non-root user's HF_HOME/XDG_CACHE_HOME, avoids PermissionError) - Add HF_HOME, XDG_CACHE_HOME, TORCH_HOME env vars so HuggingFace, ketos, and PyTorch all write to the declared writable volumes, not HOME - Add read_only: true, tmpfs (/tmp:512m), cap_drop: [ALL], no-new-privileges:true — matching the dev baseline Also extend DEPLOYMENT.md §8 upgrade notes to cover all three environments (dev/production/staging), each with its correct project-namespaced volume name. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
263 lines
10 KiB
YAML
263 lines
10 KiB
YAML
# Production / staging Docker Compose for Familienarchiv.
|
||
#
|
||
# This is a self-contained file (not an overlay over docker-compose.yml).
|
||
# All services for the prod stack live here. Environment isolation is
|
||
# achieved via the docker compose project name:
|
||
#
|
||
# production: docker compose -f docker-compose.prod.yml -p archiv-production ...
|
||
# staging: docker compose -f docker-compose.prod.yml -p archiv-staging --profile staging ...
|
||
#
|
||
# Volumes, networks and containers are namespaced by the project name,
|
||
# so the two environments cohabit cleanly on the same host.
|
||
#
|
||
# Required env vars (provided by .env.production / .env.staging in CI):
|
||
# TAG image tag (release tag or "nightly")
|
||
# PORT_BACKEND, PORT_FRONTEND host-side ports (bound to 127.0.0.1 only)
|
||
# APP_DOMAIN e.g. archiv.raddatz.cloud / staging.raddatz.cloud
|
||
# POSTGRES_PASSWORD Postgres password
|
||
# MINIO_PASSWORD MinIO root password (admin operations only)
|
||
# MINIO_APP_PASSWORD MinIO application service-account password
|
||
# (least-privilege scope: archive bucket only)
|
||
# OCR_TRAINING_TOKEN token guarding ocr-service /train endpoint
|
||
# APP_ADMIN_USERNAME seeded admin email (e.g. admin@archiv.raddatz.cloud)
|
||
# APP_ADMIN_PASSWORD seeded admin password — CRITICAL: locked in on
|
||
# first deploy because UserDataInitializer only
|
||
# creates the account if the email does not exist
|
||
# MAIL_HOST, MAIL_PORT, SMTP relay (production only; staging uses mailpit)
|
||
# MAIL_USERNAME, MAIL_PASSWORD
|
||
# APP_MAIL_FROM sender address (e.g. noreply@raddatz.cloud)
|
||
# IMPORT_HOST_DIR absolute host path holding ONLY the ODS
|
||
# spreadsheet and PDFs for /admin/system mass
|
||
# import — mounted read-only at /import inside
|
||
# the backend. Compose refuses to start when
|
||
# this var is unset, so staging and prod cannot
|
||
# accidentally share an import source. Must be
|
||
# readable by the backend container's UID
|
||
# (currently root via the OpenJDK image — any
|
||
# world-readable directory works).
|
||
|
||
networks:
|
||
archiv-net:
|
||
driver: bridge
|
||
name: ${COMPOSE_NETWORK_NAME:-archiv-net}
|
||
|
||
volumes:
|
||
postgres-data:
|
||
minio-data:
|
||
ocr-models:
|
||
ocr-cache:
|
||
|
||
services:
|
||
db:
|
||
image: postgres:16-alpine
|
||
restart: unless-stopped
|
||
environment:
|
||
POSTGRES_USER: archiv
|
||
POSTGRES_PASSWORD: ${POSTGRES_PASSWORD}
|
||
POSTGRES_DB: archiv
|
||
volumes:
|
||
- postgres-data:/var/lib/postgresql/data
|
||
networks:
|
||
- archiv-net
|
||
healthcheck:
|
||
test: ["CMD-SHELL", "pg_isready -U archiv -d archiv"]
|
||
interval: 10s
|
||
timeout: 5s
|
||
retries: 5
|
||
|
||
minio:
|
||
# Pinned MinIO release for reproducible deploys. Bumped manually until
|
||
# Renovate is bootstrapped for these production images (see follow-up issue).
|
||
image: minio/minio:RELEASE.2025-02-28T09-55-16Z
|
||
restart: unless-stopped
|
||
command: server /data --console-address ":9001"
|
||
environment:
|
||
MINIO_ROOT_USER: archiv
|
||
MINIO_ROOT_PASSWORD: ${MINIO_PASSWORD}
|
||
volumes:
|
||
- minio-data:/data
|
||
networks:
|
||
- archiv-net
|
||
healthcheck:
|
||
test: ["CMD", "curl", "-f", "http://localhost:9000/minio/health/live"]
|
||
interval: 30s
|
||
timeout: 20s
|
||
retries: 3
|
||
|
||
# Idempotent bucket bootstrap + service-account creation.
|
||
# Runs once per `docker compose up` and exits 0. The entrypoint is
|
||
# extracted to infra/minio/bootstrap.sh so the (non-trivial) idempotent
|
||
# logic is readable, reviewable, and unit-testable as a script rather
|
||
# than YAML-escaped shell.
|
||
create-buckets:
|
||
# Custom image bakes bootstrap.sh in at build time. A bind-mount fails on
|
||
# the Docker-out-of-Docker production runner because the host daemon
|
||
# resolves the relative path against the host filesystem, not the
|
||
# runner container's CWD. See #506 + infra/minio/Dockerfile.
|
||
build:
|
||
context: ./infra/minio
|
||
# Declare one-shot intent so `docker compose up -d --wait` treats
|
||
# exited(0) as success rather than "not running, fail". Pair with
|
||
# backend's `service_completed_successfully` dependency below. See #510.
|
||
restart: "no"
|
||
depends_on:
|
||
minio:
|
||
condition: service_healthy
|
||
networks:
|
||
- archiv-net
|
||
environment:
|
||
MINIO_PASSWORD: ${MINIO_PASSWORD}
|
||
MINIO_APP_PASSWORD: ${MINIO_APP_PASSWORD}
|
||
|
||
# Dev-only mail catcher; gated behind the staging profile so production
|
||
# never starts it. Staging workflow runs with `--profile staging`.
|
||
mailpit:
|
||
# Pinned for reproducibility; bumped manually until Renovate is bootstrapped.
|
||
image: axllent/mailpit:v1.29.7
|
||
restart: unless-stopped
|
||
profiles: ["staging"]
|
||
networks:
|
||
- archiv-net
|
||
healthcheck:
|
||
# TCP-port open check via BusyBox `nc`. The previous wget-based probe
|
||
# introduced a non-obvious binary dependency on the mailpit image; a
|
||
# future tag that ships without wget would silently disable the
|
||
# healthcheck. `nc` is part of BusyBox in the upstream image.
|
||
test: ["CMD-SHELL", "nc -z localhost 8025 || exit 1"]
|
||
interval: 10s
|
||
timeout: 5s
|
||
retries: 5
|
||
|
||
ocr-service:
|
||
build:
|
||
context: ./ocr-service
|
||
restart: unless-stopped
|
||
expose:
|
||
- "8000"
|
||
# Surya OCR loads ~5GB of transformer models at startup; first request
|
||
# triggers a further ~1GB Kraken model download into ocr-cache.
|
||
# CX42+ (16 GB RAM) honours the default. On a CX32 (8 GB) override with
|
||
# OCR_MEM_LIMIT=6g (slower first-request, fits the host).
|
||
mem_limit: ${OCR_MEM_LIMIT:-12g}
|
||
memswap_limit: ${OCR_MEM_LIMIT:-12g}
|
||
volumes:
|
||
- ocr-models:/app/models
|
||
- ocr-cache:/app/cache # HuggingFace / ketos cache — prevents re-downloads on recreate (HF_HOME)
|
||
environment:
|
||
HF_HOME: /app/cache
|
||
XDG_CACHE_HOME: /app/cache
|
||
TORCH_HOME: /app/models/torch
|
||
KRAKEN_MODEL_PATH: /app/models/german_kurrent.mlmodel
|
||
TRAINING_TOKEN: ${OCR_TRAINING_TOKEN}
|
||
OCR_CONFIDENCE_THRESHOLD: "0.3"
|
||
OCR_CONFIDENCE_THRESHOLD_KURRENT: "0.5"
|
||
# SSRF allowlist pinned explicitly to the internal MinIO hostname.
|
||
# In prod the OCR service only fetches PDFs from MinIO over the
|
||
# docker network; localhost/127.0.0.1 are dev-only sources and
|
||
# must NOT be reachable here. Do not widen to `*`.
|
||
ALLOWED_PDF_HOSTS: "minio"
|
||
networks:
|
||
- archiv-net
|
||
healthcheck:
|
||
test: ["CMD", "curl", "-f", "http://localhost:8000/health"]
|
||
interval: 10s
|
||
timeout: 5s
|
||
retries: 12
|
||
start_period: 120s
|
||
read_only: true
|
||
tmpfs:
|
||
- /tmp:size=512m # training endpoints write ZIPs to /tmp; 512 MB covers typical batches (20–50 images)
|
||
cap_drop:
|
||
- ALL
|
||
security_opt:
|
||
- no-new-privileges:true
|
||
|
||
backend:
|
||
image: familienarchiv/backend:${TAG:-nightly}
|
||
build:
|
||
context: ./backend
|
||
restart: unless-stopped
|
||
depends_on:
|
||
db:
|
||
condition: service_healthy
|
||
minio:
|
||
condition: service_healthy
|
||
ocr-service:
|
||
condition: service_healthy
|
||
# Gate startup on the bucket bootstrap. Without this, backend
|
||
# starts in parallel with create-buckets and may race the policy
|
||
# bind. Also tells compose's `up -d --wait` that create-buckets
|
||
# is a one-shot that must complete successfully. See #510.
|
||
create-buckets:
|
||
condition: service_completed_successfully
|
||
# Bound to localhost only — Caddy fronts external traffic.
|
||
ports:
|
||
- "127.0.0.1:${PORT_BACKEND}:8080"
|
||
# Host path holding the ODS spreadsheet + PDFs for the mass-import endpoint.
|
||
# Read-only; MassImportService only reads (Files.list / Files.walk on /import).
|
||
# Required — no default — so staging and prod cannot accidentally share an
|
||
# import source. CI workflows pin this per-env (see .gitea/workflows/).
|
||
volumes:
|
||
- ${IMPORT_HOST_DIR:?Set IMPORT_HOST_DIR to a host path holding the mass-import payload (ODS + PDFs). See docs/DEPLOYMENT.md.}:/import:ro
|
||
environment:
|
||
SPRING_DATASOURCE_URL: jdbc:postgresql://db:5432/archiv
|
||
SPRING_DATASOURCE_USERNAME: archiv
|
||
SPRING_DATASOURCE_PASSWORD: ${POSTGRES_PASSWORD}
|
||
# Application uses the bucket-scoped service account, not MinIO root.
|
||
S3_ENDPOINT: http://minio:9000
|
||
S3_ACCESS_KEY: archiv-app
|
||
S3_SECRET_KEY: ${MINIO_APP_PASSWORD}
|
||
S3_BUCKET_NAME: familienarchiv
|
||
S3_REGION: us-east-1
|
||
# No SPRING_PROFILES_ACTIVE — base application.yaml is production-ready
|
||
# (Swagger disabled, show-sql off, open-in-view false).
|
||
APP_BASE_URL: https://${APP_DOMAIN}
|
||
APP_ADMIN_USERNAME: ${APP_ADMIN_USERNAME}
|
||
APP_ADMIN_PASSWORD: ${APP_ADMIN_PASSWORD}
|
||
APP_OCR_BASE_URL: http://ocr-service:8000
|
||
APP_OCR_TRAINING_TOKEN: ${OCR_TRAINING_TOKEN}
|
||
MAIL_HOST: ${MAIL_HOST}
|
||
MAIL_PORT: ${MAIL_PORT:-587}
|
||
MAIL_USERNAME: ${MAIL_USERNAME:-}
|
||
MAIL_PASSWORD: ${MAIL_PASSWORD:-}
|
||
APP_MAIL_FROM: ${APP_MAIL_FROM:-noreply@raddatz.cloud}
|
||
SPRING_MAIL_PROPERTIES_MAIL_SMTP_AUTH: ${MAIL_SMTP_AUTH:-true}
|
||
SPRING_MAIL_PROPERTIES_MAIL_SMTP_STARTTLS_ENABLE: ${MAIL_STARTTLS_ENABLE:-true}
|
||
OTEL_EXPORTER_OTLP_ENDPOINT: http://tempo:4318
|
||
OTEL_LOGS_EXPORTER: none
|
||
OTEL_METRICS_EXPORTER: none
|
||
MANAGEMENT_METRICS_TAGS_APPLICATION: Familienarchiv
|
||
MANAGEMENT_TRACING_SAMPLING_PROBABILITY: ${MANAGEMENT_TRACING_SAMPLING_PROBABILITY:-0.1}
|
||
networks:
|
||
- archiv-net
|
||
healthcheck:
|
||
test: ["CMD-SHELL", "wget -qO- http://localhost:8081/actuator/health | grep -q UP || exit 1"]
|
||
interval: 15s
|
||
timeout: 5s
|
||
retries: 10
|
||
start_period: 30s
|
||
|
||
frontend:
|
||
image: familienarchiv/frontend:${TAG:-nightly}
|
||
build:
|
||
context: ./frontend
|
||
target: production
|
||
restart: unless-stopped
|
||
depends_on:
|
||
backend:
|
||
condition: service_healthy
|
||
ports:
|
||
- "127.0.0.1:${PORT_FRONTEND}:3000"
|
||
environment:
|
||
# SSR fetches go inside the docker network; clients hit https://${APP_DOMAIN}
|
||
API_INTERNAL_URL: http://backend:8080
|
||
ORIGIN: https://${APP_DOMAIN}
|
||
networks:
|
||
- archiv-net
|
||
healthcheck:
|
||
test: ["CMD-SHELL", "wget -qO- http://127.0.0.1:3000/login >/dev/null 2>&1 || exit 1"]
|
||
interval: 15s
|
||
timeout: 5s
|
||
retries: 10
|
||
start_period: 20s
|