From b665e1132d2a5d86131e151598c3df33035b2484 Mon Sep 17 00:00:00 2001 From: Marcel Date: Sat, 6 Jun 2026 19:20:22 +0200 Subject: [PATCH] fix(infra): deploy Ollama to prod/staging compose + fix broken model-init recipe MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit NL search returned 503 (SMART_SEARCH_UNAVAILABLE / "Intelligente Suche nicht verfügbar") on staging because Ollama was never reachable. Two defects, both downstream of #737: 1. Ollama was added only to the dev docker-compose.yml. Staging/prod deploy from the self-contained docker-compose.prod.yml, which had no ollama service — so the backend (defaulting to http://ollama:11434) hit a non-existent host (ResourceAccessException -> 503). 2. The merged model-init recipe never worked: the ollama/ollama image ENTRYPOINT is `ollama` (so `command: sh -c ...` ran as `ollama sh ...` -> "unknown command sh"), and the image ships no curl (so both the readiness loop and the healthcheck could never pass). - docker-compose.prod.yml: add ollama-model-init + ollama services and the ollama-models volume, with the corrected recipe (entrypoint override to /bin/sh -c, `ollama list` for readiness and healthcheck). - docker-compose.yml: fix the same broken entrypoint/command and the curl healthcheck so the dev stack actually starts Ollama. Verified on staging end-to-end: model-init exits 0, ollama healthy, backend reaches /api/tags, inference succeeds within the 8g limit. Refs #758 Co-Authored-By: Claude Opus 4.8 --- docker-compose.prod.yml | 59 +++++++++++++++++++++++++++++++++++++++++ docker-compose.yml | 11 +++++--- 2 files changed, 67 insertions(+), 3 deletions(-) diff --git a/docker-compose.prod.yml b/docker-compose.prod.yml index 26e07442..9aa8f80c 100644 --- a/docker-compose.prod.yml +++ b/docker-compose.prod.yml @@ -50,6 +50,7 @@ volumes: minio-data: ocr-models: ocr-cache: + ollama-models: services: db: @@ -200,6 +201,64 @@ services: security_opt: - no-new-privileges:true + # --- Ollama: Model init (one-shot pull) --- + # Pulls qwen2.5:7b-instruct-q4_K_M (~4.7 GB) into the ollama-models volume on + # first start; exits quickly on subsequent starts (model already cached). + # The ollama/ollama image's ENTRYPOINT is `ollama` and the image ships WITHOUT + # curl, so the entrypoint is overridden to a shell and readiness is probed with + # `ollama list` (not curl). Backend degrades gracefully (503) if Ollama is absent. + ollama-model-init: + image: ollama/ollama:0.30.6 + restart: "no" + entrypoint: ["/bin/sh", "-c"] + command: + - "ollama serve & until ollama list >/dev/null 2>&1; do sleep 1; done && ollama pull qwen2.5:7b-instruct-q4_K_M" + networks: + - archiv-net + volumes: + - ollama-models:/root/.ollama + mem_limit: 2g + read_only: true + tmpfs: + - /tmp:size=512m + cap_drop: + - ALL + security_opt: + - no-new-privileges:true + + # --- Ollama: LLM inference server --- + # Serves the pre-pulled model for NL search inference. Backend reaches it at + # http://ollama:11434 (application.yaml default; no env override required). + # Healthcheck uses `ollama list` because the image has no curl. + ollama: + image: ollama/ollama:0.30.6 + restart: unless-stopped + expose: + - "11434" + networks: + - archiv-net + volumes: + - ollama-models:/root/.ollama + cpus: "${OLLAMA_CPU_LIMIT:-4.0}" + mem_limit: "${OLLAMA_MEM_LIMIT:-8g}" + memswap_limit: "${OLLAMA_MEM_LIMIT:-8g}" + read_only: true + tmpfs: + - /tmp:size=512m + cap_drop: + - ALL + security_opt: + - no-new-privileges:true + healthcheck: + test: ["CMD", "ollama", "list"] + interval: 30s + timeout: 10s + retries: 5 + start_period: 60s + depends_on: + ollama-model-init: + condition: service_completed_successfully + backend: image: familienarchiv/backend:${TAG:-nightly} build: diff --git a/docker-compose.yml b/docker-compose.yml index 78ac969a..bd54432f 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -161,8 +161,11 @@ services: - ALL security_opt: - no-new-privileges:true - command: > - sh -c "ollama serve & SERVE_PID=$$! && until curl -sf http://localhost:11434/api/tags; do sleep 1; done && ollama pull qwen2.5:7b-instruct-q4_K_M && kill $$SERVE_PID" + # The image ENTRYPOINT is `ollama`, so override it to a shell; the image has + # no curl, so readiness is probed with `ollama list` instead of a curl loop. + entrypoint: ["/bin/sh", "-c"] + command: + - "ollama serve & until ollama list >/dev/null 2>&1; do sleep 1; done && ollama pull qwen2.5:7b-instruct-q4_K_M" # --- Ollama: LLM inference server --- # Serves the pre-pulled model for NL search inference. @@ -191,7 +194,9 @@ services: security_opt: - no-new-privileges:true healthcheck: - test: ["CMD", "curl", "-f", "http://localhost:11434/api/tags"] + # `ollama list` hits the local API and exits non-zero if the server is + # down — used instead of curl, which the image does not ship. + test: ["CMD", "ollama", "list"] interval: 30s timeout: 10s retries: 5