fix(infra): deploy Ollama to prod/staging compose + fix broken model-init recipe #759

Merged
marcel merged 8 commits from fix/issue-758-ollama-prod-compose into main 2026-06-06 20:30:35 +02:00
2 changed files with 67 additions and 3 deletions
Showing only changes of commit b665e1132d - Show all commits

View File

@@ -50,6 +50,7 @@ volumes:
minio-data:
ocr-models:
ocr-cache:
ollama-models:
services:
db:
@@ -200,6 +201,64 @@ services:
security_opt:
- no-new-privileges:true
# --- Ollama: Model init (one-shot pull) ---
# Pulls qwen2.5:7b-instruct-q4_K_M (~4.7 GB) into the ollama-models volume on
# first start; exits quickly on subsequent starts (model already cached).
# The ollama/ollama image's ENTRYPOINT is `ollama` and the image ships WITHOUT
# curl, so the entrypoint is overridden to a shell and readiness is probed with
# `ollama list` (not curl). Backend degrades gracefully (503) if Ollama is absent.
ollama-model-init:
image: ollama/ollama:0.30.6
restart: "no"
entrypoint: ["/bin/sh", "-c"]
command:
- "ollama serve & until ollama list >/dev/null 2>&1; do sleep 1; done && ollama pull qwen2.5:7b-instruct-q4_K_M"
networks:
- archiv-net
volumes:
- ollama-models:/root/.ollama
mem_limit: 2g
read_only: true
tmpfs:
- /tmp:size=512m
cap_drop:
- ALL
security_opt:
- no-new-privileges:true
# --- Ollama: LLM inference server ---
# Serves the pre-pulled model for NL search inference. Backend reaches it at
# http://ollama:11434 (application.yaml default; no env override required).
# Healthcheck uses `ollama list` because the image has no curl.
ollama:
image: ollama/ollama:0.30.6
restart: unless-stopped
expose:
- "11434"
networks:
- archiv-net
volumes:
- ollama-models:/root/.ollama
cpus: "${OLLAMA_CPU_LIMIT:-4.0}"
mem_limit: "${OLLAMA_MEM_LIMIT:-8g}"
memswap_limit: "${OLLAMA_MEM_LIMIT:-8g}"
read_only: true
tmpfs:
- /tmp:size=512m
cap_drop:
- ALL
security_opt:
- no-new-privileges:true
healthcheck:
test: ["CMD", "ollama", "list"]
interval: 30s
timeout: 10s
retries: 5
start_period: 60s
depends_on:
ollama-model-init:
condition: service_completed_successfully
backend:
image: familienarchiv/backend:${TAG:-nightly}
build:

View File

@@ -161,8 +161,11 @@ services:
- ALL
security_opt:
- no-new-privileges:true
command: >
sh -c "ollama serve & SERVE_PID=$$! && until curl -sf http://localhost:11434/api/tags; do sleep 1; done && ollama pull qwen2.5:7b-instruct-q4_K_M && kill $$SERVE_PID"
# The image ENTRYPOINT is `ollama`, so override it to a shell; the image has
# no curl, so readiness is probed with `ollama list` instead of a curl loop.
entrypoint: ["/bin/sh", "-c"]
command:
- "ollama serve & until ollama list >/dev/null 2>&1; do sleep 1; done && ollama pull qwen2.5:7b-instruct-q4_K_M"
# --- Ollama: LLM inference server ---
# Serves the pre-pulled model for NL search inference.
@@ -191,7 +194,9 @@ services:
security_opt:
- no-new-privileges:true
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:11434/api/tags"]
# `ollama list` hits the local API and exits non-zero if the server is
# down — used instead of curl, which the image does not ship.
test: ["CMD", "ollama", "list"]
interval: 30s
timeout: 10s
retries: 5