fix(infra): deploy Ollama to prod/staging compose + fix broken model-init recipe
All checks were successful
CI / Unit & Component Tests (pull_request) Successful in 4m0s
CI / OCR Service Tests (pull_request) Successful in 25s
CI / Backend Unit Tests (pull_request) Successful in 3m56s
CI / fail2ban Regex (pull_request) Successful in 45s
CI / Semgrep Security Scan (pull_request) Successful in 23s
CI / Compose Bucket Idempotency (pull_request) Successful in 1m5s
All checks were successful
CI / Unit & Component Tests (pull_request) Successful in 4m0s
CI / OCR Service Tests (pull_request) Successful in 25s
CI / Backend Unit Tests (pull_request) Successful in 3m56s
CI / fail2ban Regex (pull_request) Successful in 45s
CI / Semgrep Security Scan (pull_request) Successful in 23s
CI / Compose Bucket Idempotency (pull_request) Successful in 1m5s
NL search returned 503 (SMART_SEARCH_UNAVAILABLE / "Intelligente Suche nicht verfügbar") on staging because Ollama was never reachable. Two defects, both downstream of #737: 1. Ollama was added only to the dev docker-compose.yml. Staging/prod deploy from the self-contained docker-compose.prod.yml, which had no ollama service — so the backend (defaulting to http://ollama:11434) hit a non-existent host (ResourceAccessException -> 503). 2. The merged model-init recipe never worked: the ollama/ollama image ENTRYPOINT is `ollama` (so `command: sh -c ...` ran as `ollama sh ...` -> "unknown command sh"), and the image ships no curl (so both the readiness loop and the healthcheck could never pass). - docker-compose.prod.yml: add ollama-model-init + ollama services and the ollama-models volume, with the corrected recipe (entrypoint override to /bin/sh -c, `ollama list` for readiness and healthcheck). - docker-compose.yml: fix the same broken entrypoint/command and the curl healthcheck so the dev stack actually starts Ollama. Verified on staging end-to-end: model-init exits 0, ollama healthy, backend reaches /api/tags, inference succeeds within the 8g limit. Refs #758 Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -50,6 +50,7 @@ volumes:
|
||||
minio-data:
|
||||
ocr-models:
|
||||
ocr-cache:
|
||||
ollama-models:
|
||||
|
||||
services:
|
||||
db:
|
||||
@@ -200,6 +201,64 @@ services:
|
||||
security_opt:
|
||||
- no-new-privileges:true
|
||||
|
||||
# --- Ollama: Model init (one-shot pull) ---
|
||||
# Pulls qwen2.5:7b-instruct-q4_K_M (~4.7 GB) into the ollama-models volume on
|
||||
# first start; exits quickly on subsequent starts (model already cached).
|
||||
# The ollama/ollama image's ENTRYPOINT is `ollama` and the image ships WITHOUT
|
||||
# curl, so the entrypoint is overridden to a shell and readiness is probed with
|
||||
# `ollama list` (not curl). Backend degrades gracefully (503) if Ollama is absent.
|
||||
ollama-model-init:
|
||||
image: ollama/ollama:0.30.6
|
||||
restart: "no"
|
||||
entrypoint: ["/bin/sh", "-c"]
|
||||
command:
|
||||
- "ollama serve & until ollama list >/dev/null 2>&1; do sleep 1; done && ollama pull qwen2.5:7b-instruct-q4_K_M"
|
||||
networks:
|
||||
- archiv-net
|
||||
volumes:
|
||||
- ollama-models:/root/.ollama
|
||||
mem_limit: 2g
|
||||
read_only: true
|
||||
tmpfs:
|
||||
- /tmp:size=512m
|
||||
cap_drop:
|
||||
- ALL
|
||||
security_opt:
|
||||
- no-new-privileges:true
|
||||
|
||||
# --- Ollama: LLM inference server ---
|
||||
# Serves the pre-pulled model for NL search inference. Backend reaches it at
|
||||
# http://ollama:11434 (application.yaml default; no env override required).
|
||||
# Healthcheck uses `ollama list` because the image has no curl.
|
||||
ollama:
|
||||
image: ollama/ollama:0.30.6
|
||||
restart: unless-stopped
|
||||
expose:
|
||||
- "11434"
|
||||
networks:
|
||||
- archiv-net
|
||||
volumes:
|
||||
- ollama-models:/root/.ollama
|
||||
cpus: "${OLLAMA_CPU_LIMIT:-4.0}"
|
||||
mem_limit: "${OLLAMA_MEM_LIMIT:-8g}"
|
||||
memswap_limit: "${OLLAMA_MEM_LIMIT:-8g}"
|
||||
read_only: true
|
||||
tmpfs:
|
||||
- /tmp:size=512m
|
||||
cap_drop:
|
||||
- ALL
|
||||
security_opt:
|
||||
- no-new-privileges:true
|
||||
healthcheck:
|
||||
test: ["CMD", "ollama", "list"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 5
|
||||
start_period: 60s
|
||||
depends_on:
|
||||
ollama-model-init:
|
||||
condition: service_completed_successfully
|
||||
|
||||
backend:
|
||||
image: familienarchiv/backend:${TAG:-nightly}
|
||||
build:
|
||||
|
||||
@@ -161,8 +161,11 @@ services:
|
||||
- ALL
|
||||
security_opt:
|
||||
- no-new-privileges:true
|
||||
command: >
|
||||
sh -c "ollama serve & SERVE_PID=$$! && until curl -sf http://localhost:11434/api/tags; do sleep 1; done && ollama pull qwen2.5:7b-instruct-q4_K_M && kill $$SERVE_PID"
|
||||
# The image ENTRYPOINT is `ollama`, so override it to a shell; the image has
|
||||
# no curl, so readiness is probed with `ollama list` instead of a curl loop.
|
||||
entrypoint: ["/bin/sh", "-c"]
|
||||
command:
|
||||
- "ollama serve & until ollama list >/dev/null 2>&1; do sleep 1; done && ollama pull qwen2.5:7b-instruct-q4_K_M"
|
||||
|
||||
# --- Ollama: LLM inference server ---
|
||||
# Serves the pre-pulled model for NL search inference.
|
||||
@@ -191,7 +194,9 @@ services:
|
||||
security_opt:
|
||||
- no-new-privileges:true
|
||||
healthcheck:
|
||||
test: ["CMD", "curl", "-f", "http://localhost:11434/api/tags"]
|
||||
# `ollama list` hits the local API and exits non-zero if the server is
|
||||
# down — used instead of curl, which the image does not ship.
|
||||
test: ["CMD", "ollama", "list"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 5
|
||||
|
||||
Reference in New Issue
Block a user