fix(infra): replace Ollama with nlp-service in docker-compose.prod.yml
Some checks failed
CI / OCR Service Tests (pull_request) Has been cancelled
CI / Backend Unit Tests (pull_request) Has been cancelled
CI / fail2ban Regex (pull_request) Has been cancelled
CI / Semgrep Security Scan (pull_request) Has been cancelled
CI / Compose Bucket Idempotency (pull_request) Has been cancelled
CI / Unit & Component Tests (pull_request) Has been cancelled
Some checks failed
CI / OCR Service Tests (pull_request) Has been cancelled
CI / Backend Unit Tests (pull_request) Has been cancelled
CI / fail2ban Regex (pull_request) Has been cancelled
CI / Semgrep Security Scan (pull_request) Has been cancelled
CI / Compose Bucket Idempotency (pull_request) Has been cancelled
CI / Unit & Component Tests (pull_request) Has been cancelled
Removes the ollama and ollama-model-init services (and ollama-models volume) from the production/staging compose file. Adds the nlp-service in their place — mirroring the dev compose — and wires the backend dependency and APP_NLP_BASE_URL env var so staging can reach the new service. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -50,7 +50,6 @@ volumes:
|
|||||||
minio-data:
|
minio-data:
|
||||||
ocr-models:
|
ocr-models:
|
||||||
ocr-cache:
|
ocr-cache:
|
||||||
ollama-models:
|
|
||||||
|
|
||||||
services:
|
services:
|
||||||
db:
|
db:
|
||||||
@@ -201,72 +200,38 @@ services:
|
|||||||
security_opt:
|
security_opt:
|
||||||
- no-new-privileges:true
|
- no-new-privileges:true
|
||||||
|
|
||||||
# --- Ollama: Model init (one-shot pull) ---
|
# --- NLP service: rule-based NL query parser ---
|
||||||
# Pulls qwen2.5:7b-instruct-q4_K_M (~4.7 GB) into the ollama-models volume on
|
# Lightweight FastAPI service; replaces Ollama for smart search query parsing.
|
||||||
# first start; exits quickly on subsequent starts (model already cached).
|
# Connects to the DB at startup to build person/tag lookup tables.
|
||||||
# The ollama/ollama image's ENTRYPOINT is `ollama` and the image ships WITHOUT
|
nlp-service:
|
||||||
# curl, so the entrypoint is overridden to a shell and readiness is probed with
|
build:
|
||||||
# `ollama list` (not curl). The pull is guarded by a `grep` on the cached model
|
context: ./nlp-service
|
||||||
# list so a model already on the volume exits clean WITHOUT a registry round-trip
|
|
||||||
# — a host reboot during a registry/network blip can no longer fail init (which
|
|
||||||
# would block the ollama service via service_completed_successfully).
|
|
||||||
# Backend degrades gracefully (503) if Ollama is absent.
|
|
||||||
ollama-model-init:
|
|
||||||
image: ollama/ollama:0.30.6
|
|
||||||
restart: "no"
|
|
||||||
entrypoint: ["/bin/sh", "-c"]
|
|
||||||
command:
|
|
||||||
- "ollama serve & until ollama list >/dev/null 2>&1; do sleep 1; done && (ollama list | grep -q 'qwen2.5:7b-instruct-q4_K_M' || ollama pull qwen2.5:7b-instruct-q4_K_M)"
|
|
||||||
networks:
|
|
||||||
- archiv-net
|
|
||||||
volumes:
|
|
||||||
- ollama-models:/root/.ollama
|
|
||||||
mem_limit: 2g
|
|
||||||
read_only: true
|
|
||||||
tmpfs:
|
|
||||||
- /tmp:size=512m
|
|
||||||
cap_drop:
|
|
||||||
- ALL
|
|
||||||
security_opt:
|
|
||||||
- no-new-privileges:true
|
|
||||||
|
|
||||||
# --- Ollama: LLM inference server ---
|
|
||||||
# Serves the pre-pulled model for NL search inference. Backend reaches it at
|
|
||||||
# http://ollama:11434 (application.yaml default; no env override required).
|
|
||||||
# Healthcheck uses `ollama list` because the image has no curl.
|
|
||||||
ollama:
|
|
||||||
image: ollama/ollama:0.30.6
|
|
||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
expose:
|
expose:
|
||||||
- "11434"
|
- "8001"
|
||||||
networks:
|
networks:
|
||||||
- archiv-net
|
- archiv-net
|
||||||
volumes:
|
|
||||||
- ollama-models:/root/.ollama
|
|
||||||
environment:
|
environment:
|
||||||
# Pin the model in memory (no idle unload). Without this, Ollama evicts
|
DATABASE_URL: "postgresql://archiv:${POSTGRES_PASSWORD}@db:5432/archiv"
|
||||||
# the model after ~5 min idle and the next query pays a cold-load penalty
|
NLP_FUZZY_THRESHOLD: "${NLP_FUZZY_THRESHOLD:-80}"
|
||||||
# that exceeds the backend read timeout → NL search 503 after idle.
|
mem_limit: 256m
|
||||||
OLLAMA_KEEP_ALIVE: "-1"
|
memswap_limit: 256m
|
||||||
cpus: "${OLLAMA_CPU_LIMIT:-4.0}"
|
|
||||||
mem_limit: "${OLLAMA_MEM_LIMIT:-8g}"
|
|
||||||
memswap_limit: "${OLLAMA_MEM_LIMIT:-8g}"
|
|
||||||
read_only: true
|
read_only: true
|
||||||
tmpfs:
|
tmpfs:
|
||||||
- /tmp:size=512m
|
- /tmp:size=32m
|
||||||
cap_drop:
|
cap_drop:
|
||||||
- ALL
|
- ALL
|
||||||
security_opt:
|
security_opt:
|
||||||
- no-new-privileges:true
|
- no-new-privileges:true
|
||||||
healthcheck:
|
healthcheck:
|
||||||
test: ["CMD", "ollama", "list"]
|
test: ["CMD", "curl", "-f", "http://localhost:8001/health"]
|
||||||
interval: 30s
|
interval: 10s
|
||||||
timeout: 10s
|
timeout: 5s
|
||||||
retries: 5
|
retries: 5
|
||||||
start_period: 60s
|
start_period: 15s
|
||||||
depends_on:
|
depends_on:
|
||||||
ollama-model-init:
|
db:
|
||||||
condition: service_completed_successfully
|
condition: service_healthy
|
||||||
|
|
||||||
backend:
|
backend:
|
||||||
image: familienarchiv/backend:${TAG:-nightly}
|
image: familienarchiv/backend:${TAG:-nightly}
|
||||||
@@ -286,6 +251,8 @@ services:
|
|||||||
# is a one-shot that must complete successfully. See #510.
|
# is a one-shot that must complete successfully. See #510.
|
||||||
create-buckets:
|
create-buckets:
|
||||||
condition: service_completed_successfully
|
condition: service_completed_successfully
|
||||||
|
nlp-service:
|
||||||
|
condition: service_healthy
|
||||||
# Bound to localhost only — Caddy fronts external traffic.
|
# Bound to localhost only — Caddy fronts external traffic.
|
||||||
ports:
|
ports:
|
||||||
- "127.0.0.1:${PORT_BACKEND}:8080"
|
- "127.0.0.1:${PORT_BACKEND}:8080"
|
||||||
@@ -320,6 +287,7 @@ services:
|
|||||||
APP_ADMIN_PASSWORD: ${APP_ADMIN_PASSWORD}
|
APP_ADMIN_PASSWORD: ${APP_ADMIN_PASSWORD}
|
||||||
APP_OCR_BASE_URL: http://ocr-service:8000
|
APP_OCR_BASE_URL: http://ocr-service:8000
|
||||||
APP_OCR_TRAINING_TOKEN: ${OCR_TRAINING_TOKEN}
|
APP_OCR_TRAINING_TOKEN: ${OCR_TRAINING_TOKEN}
|
||||||
|
APP_NLP_BASE_URL: http://nlp-service:8001
|
||||||
MAIL_HOST: ${MAIL_HOST}
|
MAIL_HOST: ${MAIL_HOST}
|
||||||
MAIL_PORT: ${MAIL_PORT:-587}
|
MAIL_PORT: ${MAIL_PORT:-587}
|
||||||
MAIL_USERNAME: ${MAIL_USERNAME:-}
|
MAIL_USERNAME: ${MAIL_USERNAME:-}
|
||||||
|
|||||||
Reference in New Issue
Block a user