Some checks failed
CI / Unit & Component Tests (push) Failing after 2m47s
CI / OCR Service Tests (push) Successful in 17s
CI / Backend Unit Tests (push) Successful in 4m12s
CI / fail2ban Regex (push) Successful in 37s
CI / Compose Bucket Idempotency (push) Successful in 56s
CI / Unit & Component Tests (pull_request) Failing after 2m49s
CI / OCR Service Tests (pull_request) Successful in 16s
CI / Backend Unit Tests (pull_request) Successful in 4m13s
CI / fail2ban Regex (pull_request) Successful in 38s
CI / Compose Bucket Idempotency (pull_request) Successful in 58s
Closes #510. `docker compose up -d --wait` exits 1 even when every service is healthy because the one-shot `create-buckets` exits 0 and --wait expects "running". The whole stack came up fine on staging, but the workflow gate failed before the smoke step could run. Two changes: 1. create-buckets: `restart: "no"` declares one-shot intent. 2. backend.depends_on: add `create-buckets: service_completed_successfully`. With both, compose v2.20+ understands create-buckets is a one-shot that must complete successfully, and --wait treats exited(0) as the target state. Backend startup now also correctly gates on bucket bootstrap (closes a latent race where backend could start before the archiv-app policy was bound). Verified `docker compose config --quiet` parses and the resolved config shows the right dependency graph. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
232 lines
8.6 KiB
YAML
232 lines
8.6 KiB
YAML
# Production / staging Docker Compose for Familienarchiv.
|
|
#
|
|
# This is a self-contained file (not an overlay over docker-compose.yml).
|
|
# All services for the prod stack live here. Environment isolation is
|
|
# achieved via the docker compose project name:
|
|
#
|
|
# production: docker compose -f docker-compose.prod.yml -p archiv-production ...
|
|
# staging: docker compose -f docker-compose.prod.yml -p archiv-staging --profile staging ...
|
|
#
|
|
# Volumes, networks and containers are namespaced by the project name,
|
|
# so the two environments cohabit cleanly on the same host.
|
|
#
|
|
# Required env vars (provided by .env.production / .env.staging in CI):
|
|
# TAG image tag (release tag or "nightly")
|
|
# PORT_BACKEND, PORT_FRONTEND host-side ports (bound to 127.0.0.1 only)
|
|
# APP_DOMAIN e.g. archiv.raddatz.cloud / staging.raddatz.cloud
|
|
# POSTGRES_PASSWORD Postgres password
|
|
# MINIO_PASSWORD MinIO root password (admin operations only)
|
|
# MINIO_APP_PASSWORD MinIO application service-account password
|
|
# (least-privilege scope: archive bucket only)
|
|
# OCR_TRAINING_TOKEN token guarding ocr-service /train endpoint
|
|
# APP_ADMIN_USERNAME seeded admin email (e.g. admin@archiv.raddatz.cloud)
|
|
# APP_ADMIN_PASSWORD seeded admin password — CRITICAL: locked in on
|
|
# first deploy because UserDataInitializer only
|
|
# creates the account if the email does not exist
|
|
# MAIL_HOST, MAIL_PORT, SMTP relay (production only; staging uses mailpit)
|
|
# MAIL_USERNAME, MAIL_PASSWORD
|
|
# APP_MAIL_FROM sender address (e.g. noreply@raddatz.cloud)
|
|
|
|
networks:
|
|
archiv-net:
|
|
driver: bridge
|
|
|
|
volumes:
|
|
postgres-data:
|
|
minio-data:
|
|
ocr-models:
|
|
ocr-cache:
|
|
|
|
services:
|
|
db:
|
|
image: postgres:16-alpine
|
|
restart: unless-stopped
|
|
environment:
|
|
POSTGRES_USER: archiv
|
|
POSTGRES_PASSWORD: ${POSTGRES_PASSWORD}
|
|
POSTGRES_DB: archiv
|
|
volumes:
|
|
- postgres-data:/var/lib/postgresql/data
|
|
networks:
|
|
- archiv-net
|
|
healthcheck:
|
|
test: ["CMD-SHELL", "pg_isready -U archiv -d archiv"]
|
|
interval: 10s
|
|
timeout: 5s
|
|
retries: 5
|
|
|
|
minio:
|
|
# Pinned MinIO release for reproducible deploys. Bumped manually until
|
|
# Renovate is bootstrapped for these production images (see follow-up issue).
|
|
image: minio/minio:RELEASE.2025-02-28T09-55-16Z
|
|
restart: unless-stopped
|
|
command: server /data --console-address ":9001"
|
|
environment:
|
|
MINIO_ROOT_USER: archiv
|
|
MINIO_ROOT_PASSWORD: ${MINIO_PASSWORD}
|
|
volumes:
|
|
- minio-data:/data
|
|
networks:
|
|
- archiv-net
|
|
healthcheck:
|
|
test: ["CMD", "curl", "-f", "http://localhost:9000/minio/health/live"]
|
|
interval: 30s
|
|
timeout: 20s
|
|
retries: 3
|
|
|
|
# Idempotent bucket bootstrap + service-account creation.
|
|
# Runs once per `docker compose up` and exits 0. The entrypoint is
|
|
# extracted to infra/minio/bootstrap.sh so the (non-trivial) idempotent
|
|
# logic is readable, reviewable, and unit-testable as a script rather
|
|
# than YAML-escaped shell.
|
|
create-buckets:
|
|
# Custom image bakes bootstrap.sh in at build time. A bind-mount fails on
|
|
# the Docker-out-of-Docker production runner because the host daemon
|
|
# resolves the relative path against the host filesystem, not the
|
|
# runner container's CWD. See #506 + infra/minio/Dockerfile.
|
|
build:
|
|
context: ./infra/minio
|
|
# Declare one-shot intent so `docker compose up -d --wait` treats
|
|
# exited(0) as success rather than "not running, fail". Pair with
|
|
# backend's `service_completed_successfully` dependency below. See #510.
|
|
restart: "no"
|
|
depends_on:
|
|
minio:
|
|
condition: service_healthy
|
|
networks:
|
|
- archiv-net
|
|
environment:
|
|
MINIO_PASSWORD: ${MINIO_PASSWORD}
|
|
MINIO_APP_PASSWORD: ${MINIO_APP_PASSWORD}
|
|
|
|
# Dev-only mail catcher; gated behind the staging profile so production
|
|
# never starts it. Staging workflow runs with `--profile staging`.
|
|
mailpit:
|
|
# Pinned for reproducibility; bumped manually until Renovate is bootstrapped.
|
|
image: axllent/mailpit:v1.29.7
|
|
restart: unless-stopped
|
|
profiles: ["staging"]
|
|
networks:
|
|
- archiv-net
|
|
healthcheck:
|
|
# TCP-port open check via BusyBox `nc`. The previous wget-based probe
|
|
# introduced a non-obvious binary dependency on the mailpit image; a
|
|
# future tag that ships without wget would silently disable the
|
|
# healthcheck. `nc` is part of BusyBox in the upstream image.
|
|
test: ["CMD-SHELL", "nc -z localhost 8025 || exit 1"]
|
|
interval: 10s
|
|
timeout: 5s
|
|
retries: 5
|
|
|
|
ocr-service:
|
|
build:
|
|
context: ./ocr-service
|
|
restart: unless-stopped
|
|
expose:
|
|
- "8000"
|
|
# Surya OCR loads ~5GB of transformer models at startup; first request
|
|
# triggers a further ~1GB Kraken model download into ocr-cache.
|
|
# CX42+ (16 GB RAM) honours the default. On a CX32 (8 GB) override with
|
|
# OCR_MEM_LIMIT=6g (slower first-request, fits the host).
|
|
mem_limit: ${OCR_MEM_LIMIT:-12g}
|
|
memswap_limit: ${OCR_MEM_LIMIT:-12g}
|
|
volumes:
|
|
- ocr-models:/app/models
|
|
- ocr-cache:/root/.cache
|
|
environment:
|
|
KRAKEN_MODEL_PATH: /app/models/german_kurrent.mlmodel
|
|
TRAINING_TOKEN: ${OCR_TRAINING_TOKEN}
|
|
OCR_CONFIDENCE_THRESHOLD: "0.3"
|
|
OCR_CONFIDENCE_THRESHOLD_KURRENT: "0.5"
|
|
# SSRF allowlist pinned explicitly to the internal MinIO hostname.
|
|
# In prod the OCR service only fetches PDFs from MinIO over the
|
|
# docker network; localhost/127.0.0.1 are dev-only sources and
|
|
# must NOT be reachable here. Do not widen to `*`.
|
|
ALLOWED_PDF_HOSTS: "minio"
|
|
networks:
|
|
- archiv-net
|
|
healthcheck:
|
|
test: ["CMD", "curl", "-f", "http://localhost:8000/health"]
|
|
interval: 10s
|
|
timeout: 5s
|
|
retries: 12
|
|
start_period: 120s
|
|
|
|
backend:
|
|
image: familienarchiv/backend:${TAG:-nightly}
|
|
build:
|
|
context: ./backend
|
|
restart: unless-stopped
|
|
depends_on:
|
|
db:
|
|
condition: service_healthy
|
|
minio:
|
|
condition: service_healthy
|
|
ocr-service:
|
|
condition: service_healthy
|
|
# Gate startup on the bucket bootstrap. Without this, backend
|
|
# starts in parallel with create-buckets and may race the policy
|
|
# bind. Also tells compose's `up -d --wait` that create-buckets
|
|
# is a one-shot that must complete successfully. See #510.
|
|
create-buckets:
|
|
condition: service_completed_successfully
|
|
# Bound to localhost only — Caddy fronts external traffic.
|
|
ports:
|
|
- "127.0.0.1:${PORT_BACKEND}:8080"
|
|
environment:
|
|
SPRING_DATASOURCE_URL: jdbc:postgresql://db:5432/archiv
|
|
SPRING_DATASOURCE_USERNAME: archiv
|
|
SPRING_DATASOURCE_PASSWORD: ${POSTGRES_PASSWORD}
|
|
# Application uses the bucket-scoped service account, not MinIO root.
|
|
S3_ENDPOINT: http://minio:9000
|
|
S3_ACCESS_KEY: archiv-app
|
|
S3_SECRET_KEY: ${MINIO_APP_PASSWORD}
|
|
S3_BUCKET_NAME: familienarchiv
|
|
S3_REGION: us-east-1
|
|
# No SPRING_PROFILES_ACTIVE — base application.yaml is production-ready
|
|
# (Swagger disabled, show-sql off, open-in-view false).
|
|
APP_BASE_URL: https://${APP_DOMAIN}
|
|
APP_ADMIN_USERNAME: ${APP_ADMIN_USERNAME}
|
|
APP_ADMIN_PASSWORD: ${APP_ADMIN_PASSWORD}
|
|
APP_OCR_BASE_URL: http://ocr-service:8000
|
|
APP_OCR_TRAINING_TOKEN: ${OCR_TRAINING_TOKEN}
|
|
MAIL_HOST: ${MAIL_HOST}
|
|
MAIL_PORT: ${MAIL_PORT:-587}
|
|
MAIL_USERNAME: ${MAIL_USERNAME:-}
|
|
MAIL_PASSWORD: ${MAIL_PASSWORD:-}
|
|
APP_MAIL_FROM: ${APP_MAIL_FROM:-noreply@raddatz.cloud}
|
|
SPRING_MAIL_PROPERTIES_MAIL_SMTP_AUTH: ${MAIL_SMTP_AUTH:-true}
|
|
SPRING_MAIL_PROPERTIES_MAIL_SMTP_STARTTLS_ENABLE: ${MAIL_STARTTLS_ENABLE:-true}
|
|
networks:
|
|
- archiv-net
|
|
healthcheck:
|
|
test: ["CMD-SHELL", "wget -qO- http://localhost:8080/actuator/health | grep -q UP || exit 1"]
|
|
interval: 15s
|
|
timeout: 5s
|
|
retries: 10
|
|
start_period: 30s
|
|
|
|
frontend:
|
|
image: familienarchiv/frontend:${TAG:-nightly}
|
|
build:
|
|
context: ./frontend
|
|
target: production
|
|
restart: unless-stopped
|
|
depends_on:
|
|
backend:
|
|
condition: service_healthy
|
|
ports:
|
|
- "127.0.0.1:${PORT_FRONTEND}:3000"
|
|
environment:
|
|
# SSR fetches go inside the docker network; clients hit https://${APP_DOMAIN}
|
|
API_INTERNAL_URL: http://backend:8080
|
|
ORIGIN: https://${APP_DOMAIN}
|
|
networks:
|
|
- archiv-net
|
|
healthcheck:
|
|
test: ["CMD-SHELL", "wget -qO- http://localhost:3000/login >/dev/null 2>&1 || exit 1"]
|
|
interval: 15s
|
|
timeout: 5s
|
|
retries: 10
|
|
start_period: 20s
|