Files
familienarchiv/docker-compose.prod.yml
Marcel 46d1f5c6d8 chore(import): stop tracking real family PII canonical artifacts
The four files in tools/import-normalizer/out/ contain real names,
addresses, and attribution prose for ~163 living/deceased family members
and were committed by mistake. They are now removed from the index
(kept on disk for local development) and gitignored.

The canonical artifacts are produced locally from the Python normalizer
and synced into IMPORT_HOST_DIR out-of-band alongside the PDFs. The
contract between normalizer and importer is the header schema, not the
file contents — CanonicalSheetReader fails closed on a missing header,
which is what locks the contract.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-28 10:20:38 +02:00

308 lines
13 KiB
YAML

# Production / staging Docker Compose for Familienarchiv.
#
# This is a self-contained file (not an overlay over docker-compose.yml).
# All services for the prod stack live here. Environment isolation is
# achieved via the docker compose project name:
#
# production: docker compose -f docker-compose.prod.yml -p archiv-production ...
# staging: docker compose -f docker-compose.prod.yml -p archiv-staging --profile staging ...
#
# Volumes, networks and containers are namespaced by the project name,
# so the two environments cohabit cleanly on the same host.
#
# Required env vars (provided by .env.production / .env.staging in CI):
# TAG image tag (release tag or "nightly")
# PORT_BACKEND, PORT_FRONTEND host-side ports (bound to 127.0.0.1 only)
# APP_DOMAIN e.g. archiv.raddatz.cloud / staging.raddatz.cloud
# POSTGRES_PASSWORD Postgres password
# MINIO_PASSWORD MinIO root password (admin operations only)
# MINIO_APP_PASSWORD MinIO application service-account password
# (least-privilege scope: archive bucket only)
# OCR_TRAINING_TOKEN token guarding ocr-service /train endpoint
# APP_ADMIN_USERNAME seeded admin email (e.g. admin@archiv.raddatz.cloud)
# APP_ADMIN_PASSWORD seeded admin password — CRITICAL: locked in on
# first deploy because UserDataInitializer only
# creates the account if the email does not exist
# MAIL_HOST, MAIL_PORT, SMTP relay (production only; staging uses mailpit)
# MAIL_USERNAME, MAIL_PASSWORD
# APP_MAIL_FROM sender address (e.g. noreply@raddatz.cloud)
# IMPORT_HOST_DIR absolute host path holding the canonical
# import artifacts (canonical-*.xlsx +
# canonical-persons-tree.json) and the
# <index>.pdf files for /admin/system
# import — mounted read-only at /import inside
# the backend. Compose refuses to start when
# this var is unset, so staging and prod cannot
# accidentally share an import source. Must be
# readable by the backend container's UID
# (currently root via the OpenJDK image — any
# world-readable directory works). Canonical
# artifacts are NOT in git (PII — ADR-025); ops
# syncs them in beside the PDFs out-of-band.
networks:
archiv-net:
driver: bridge
name: ${COMPOSE_NETWORK_NAME:-archiv-net}
volumes:
postgres-data:
minio-data:
ocr-models:
ocr-cache:
services:
db:
image: postgres:16-alpine
restart: unless-stopped
environment:
POSTGRES_USER: archiv
POSTGRES_PASSWORD: ${POSTGRES_PASSWORD}
POSTGRES_DB: archiv
volumes:
- postgres-data:/var/lib/postgresql/data
networks:
- archiv-net
healthcheck:
test: ["CMD-SHELL", "pg_isready -U archiv -d archiv"]
interval: 10s
timeout: 5s
retries: 5
minio:
# Pinned MinIO release for reproducible deploys. Bumped manually until
# Renovate is bootstrapped for these production images (see follow-up issue).
image: minio/minio:RELEASE.2025-02-28T09-55-16Z
restart: unless-stopped
command: server /data --console-address ":9001"
environment:
MINIO_ROOT_USER: archiv
MINIO_ROOT_PASSWORD: ${MINIO_PASSWORD}
volumes:
- minio-data:/data
networks:
- archiv-net
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:9000/minio/health/live"]
interval: 30s
timeout: 20s
retries: 3
# Idempotent bucket bootstrap + service-account creation.
# Runs once per `docker compose up` and exits 0. The entrypoint is
# extracted to infra/minio/bootstrap.sh so the (non-trivial) idempotent
# logic is readable, reviewable, and unit-testable as a script rather
# than YAML-escaped shell.
create-buckets:
# Custom image bakes bootstrap.sh in at build time. A bind-mount fails on
# the Docker-out-of-Docker production runner because the host daemon
# resolves the relative path against the host filesystem, not the
# runner container's CWD. See #506 + infra/minio/Dockerfile.
build:
context: ./infra/minio
# Declare one-shot intent so `docker compose up -d --wait` treats
# exited(0) as success rather than "not running, fail". Pair with
# backend's `service_completed_successfully` dependency below. See #510.
restart: "no"
depends_on:
minio:
condition: service_healthy
networks:
- archiv-net
environment:
MINIO_PASSWORD: ${MINIO_PASSWORD}
MINIO_APP_PASSWORD: ${MINIO_APP_PASSWORD}
# Dev-only mail catcher; gated behind the staging profile so production
# never starts it. Staging workflow runs with `--profile staging`.
mailpit:
# Pinned for reproducibility; bumped manually until Renovate is bootstrapped.
image: axllent/mailpit:v1.29.7
restart: unless-stopped
profiles: ["staging"]
networks:
- archiv-net
healthcheck:
# TCP-port open check via BusyBox `nc`. The previous wget-based probe
# introduced a non-obvious binary dependency on the mailpit image; a
# future tag that ships without wget would silently disable the
# healthcheck. `nc` is part of BusyBox in the upstream image.
test: ["CMD-SHELL", "nc -z localhost 8025 || exit 1"]
interval: 10s
timeout: 5s
retries: 5
# --- OCR: Volume bootstrap ---
# Ensures correct ownership and directory structure on ocr-cache / ocr-models
# before ocr-service starts. Handles pre-existing volumes (including those
# created before the non-root ocr user was introduced in commit 1aca4c4a)
# and guarantees /app/cache/.tmp exists for TMPDIR staging. See ADR-021.
ocr-volume-init:
image: alpine:3.21
command:
- sh
- -c
- "chown -R 1000:1000 /app/cache /app/models && mkdir -p /app/cache/.tmp && chown 1000:1000 /app/cache/.tmp"
volumes:
- ocr-models:/app/models
- ocr-cache:/app/cache
networks: []
restart: "no"
ocr-service:
build:
context: ./ocr-service
restart: unless-stopped
expose:
- "8000"
# Surya OCR loads ~5GB of transformer models at startup; first request
# triggers a further ~1GB Kraken model download into ocr-cache.
# CX42+ (16 GB RAM) honours the default. On a CX32 (8 GB) override with
# OCR_MEM_LIMIT=6g (slower first-request, fits the host).
mem_limit: ${OCR_MEM_LIMIT:-12g}
memswap_limit: ${OCR_MEM_LIMIT:-12g}
volumes:
- ocr-models:/app/models
- ocr-cache:/app/cache # HuggingFace / ketos cache — prevents re-downloads on recreate (HF_HOME)
environment:
HF_HOME: /app/cache
XDG_CACHE_HOME: /app/cache
TORCH_HOME: /app/models/torch
TMPDIR: /app/cache/.tmp # Stage GB-scale Surya model downloads on SSD, not the 512 MB RAM tmpfs.
# /tmp keeps its small DoS cap; training ZIPs still unpack under /tmp
# but ZIP Slip protection (_validate_zip_entry) is unchanged. See ADR-021.
KRAKEN_MODEL_PATH: /app/models/german_kurrent.mlmodel
TRAINING_TOKEN: ${OCR_TRAINING_TOKEN}
OCR_CONFIDENCE_THRESHOLD: "0.3"
OCR_CONFIDENCE_THRESHOLD_KURRENT: "0.5"
# SSRF allowlist pinned explicitly to the internal MinIO hostname.
# In prod the OCR service only fetches PDFs from MinIO over the
# docker network; localhost/127.0.0.1 are dev-only sources and
# must NOT be reachable here. Do not widen to `*`.
ALLOWED_PDF_HOSTS: "minio"
networks:
- archiv-net
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:8000/health"]
interval: 10s
timeout: 5s
retries: 12
start_period: 120s
depends_on:
ocr-volume-init:
condition: service_completed_successfully
read_only: true
tmpfs:
- /tmp:size=512m # training-ZIP unzip + transient PDF buffers only (small, RAM-friendly).
# GB-scale model downloads go to TMPDIR=/app/cache/.tmp instead. See ADR-021.
cap_drop:
- ALL
security_opt:
- no-new-privileges:true
backend:
image: familienarchiv/backend:${TAG:-nightly}
build:
context: ./backend
restart: unless-stopped
depends_on:
db:
condition: service_healthy
minio:
condition: service_healthy
ocr-service:
condition: service_healthy
# Gate startup on the bucket bootstrap. Without this, backend
# starts in parallel with create-buckets and may race the policy
# bind. Also tells compose's `up -d --wait` that create-buckets
# is a one-shot that must complete successfully. See #510.
create-buckets:
condition: service_completed_successfully
# Bound to localhost only — Caddy fronts external traffic.
ports:
- "127.0.0.1:${PORT_BACKEND}:8080"
# Host path holding the canonical import artifacts (canonical-*.xlsx +
# canonical-persons-tree.json) + <index>.pdf files for the import endpoint.
# Read-only; the canonical importer only reads them from /import.
# Required — no default — so staging and prod cannot accidentally share an
# import source. CI workflows pin this per-env (see .gitea/workflows/).
# NOTE: the canonical artifacts are NOT version-controlled (they contain real
# family PII — see ADR-025). Ops must produce them locally from the Python
# normalizer (tools/import-normalizer/) and sync them into this host path
# alongside the <index>.pdf corpus before triggering an import.
volumes:
- ${IMPORT_HOST_DIR:?Set IMPORT_HOST_DIR to a host path holding the import payload (canonical artifacts + <index>.pdf files). See docs/DEPLOYMENT.md.}:/import:ro
environment:
SPRING_DATASOURCE_URL: jdbc:postgresql://db:5432/archiv
SPRING_DATASOURCE_USERNAME: archiv
SPRING_DATASOURCE_PASSWORD: ${POSTGRES_PASSWORD}
# Consumed by Flyway V68 via the ${grafanaDbPassword} placeholder to set
# the read-only grafana_reader role's password.
GRAFANA_DB_PASSWORD: ${GRAFANA_DB_PASSWORD}
# Application uses the bucket-scoped service account, not MinIO root.
S3_ENDPOINT: http://minio:9000
S3_ACCESS_KEY: archiv-app
S3_SECRET_KEY: ${MINIO_APP_PASSWORD}
S3_BUCKET_NAME: familienarchiv
S3_REGION: us-east-1
# No SPRING_PROFILES_ACTIVE — base application.yaml is production-ready
# (Swagger disabled, show-sql off, open-in-view false).
APP_BASE_URL: https://${APP_DOMAIN}
APP_ADMIN_USERNAME: ${APP_ADMIN_USERNAME}
APP_ADMIN_PASSWORD: ${APP_ADMIN_PASSWORD}
APP_OCR_BASE_URL: http://ocr-service:8000
APP_OCR_TRAINING_TOKEN: ${OCR_TRAINING_TOKEN}
MAIL_HOST: ${MAIL_HOST}
MAIL_PORT: ${MAIL_PORT:-587}
MAIL_USERNAME: ${MAIL_USERNAME:-}
MAIL_PASSWORD: ${MAIL_PASSWORD:-}
APP_MAIL_FROM: ${APP_MAIL_FROM:-noreply@raddatz.cloud}
SPRING_MAIL_PROPERTIES_MAIL_SMTP_AUTH: ${MAIL_SMTP_AUTH:-true}
SPRING_MAIL_PROPERTIES_MAIL_SMTP_STARTTLS_ENABLE: ${MAIL_STARTTLS_ENABLE:-true}
OTEL_EXPORTER_OTLP_ENDPOINT: http://tempo:4318
OTEL_LOGS_EXPORTER: none
OTEL_METRICS_EXPORTER: none
MANAGEMENT_METRICS_TAGS_APPLICATION: Familienarchiv
MANAGEMENT_TRACING_SAMPLING_PROBABILITY: ${MANAGEMENT_TRACING_SAMPLING_PROBABILITY:-0.1}
SENTRY_DSN: ${SENTRY_DSN:-}
LOGGING_STRUCTURED_FORMAT_CONSOLE: ecs
networks:
- archiv-net
healthcheck:
test: ["CMD-SHELL", "wget -qO- http://localhost:8081/actuator/health | grep -q UP || exit 1"]
interval: 15s
timeout: 5s
retries: 10
start_period: 30s
frontend:
image: familienarchiv/frontend:${TAG:-nightly}
build:
context: ./frontend
target: production
args:
# Vite build-time variable — baked into the JS bundle at build time.
# Empty default so deploys succeed before the secret is configured.
VITE_SENTRY_DSN: ${VITE_SENTRY_DSN:-}
restart: unless-stopped
depends_on:
backend:
condition: service_healthy
ports:
- "127.0.0.1:${PORT_FRONTEND}:3000"
environment:
# SSR fetches go inside the docker network; clients hit https://${APP_DOMAIN}
API_INTERNAL_URL: http://backend:8080
ORIGIN: https://${APP_DOMAIN}
# Enforce upload size limit in the adapter-node layer (fixes GHSA-2crg-3p73-43xp bypass).
# Must be ≤ client_max_body_size in the Caddy reverse proxy to avoid 413 mismatches.
BODY_SIZE_LIMIT: 50M
networks:
- archiv-net
healthcheck:
test: ["CMD-SHELL", "wget -qO- http://127.0.0.1:3000/login >/dev/null 2>&1 || exit 1"]
interval: 15s
timeout: 5s
retries: 10
start_period: 20s