feat(infra): replace Ollama with nlp-service in docker-compose

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Marcel
2026-06-07 16:12:13 +02:00
committed by marcel
parent ab10daf325
commit 34ff3dbdfd
2 changed files with 23 additions and 56 deletions

View File

@@ -141,74 +141,41 @@ services:
security_opt: security_opt:
- no-new-privileges:true - no-new-privileges:true
# --- Ollama: Model init (one-shot pull) --- # --- NLP service: rule-based NL query parser ---
# Pulls qwen2.5:7b-instruct-q4_K_M (~4.7 GB) into the ollama_models volume on first start. # FastAPI Python service; replaces Ollama for smart search query parsing.
# On subsequent starts (model already in volume), exits quickly without re-downloading.
# Not started in CI — CI uses explicit service selection # Not started in CI — CI uses explicit service selection
# (docker-compose.ci.yml: db minio create-buckets) # (docker-compose.ci.yml: db minio create-buckets)
ollama-model-init: nlp-service:
image: ollama/ollama:0.30.6 build:
restart: "no" context: ./nlp-service
networks: dockerfile: Dockerfile
- archiv-net container_name: archive-nlp
volumes:
- ollama_models:/root/.ollama
mem_limit: 2g
read_only: true
tmpfs:
- /tmp:size=512m
cap_drop:
- ALL
security_opt:
- no-new-privileges:true
# The image ENTRYPOINT is `ollama`, so override it to a shell; the image has
# no curl, so readiness is probed with `ollama list` instead of a curl loop.
# The pull is guarded by a `grep` on the cached model list so an already-cached
# model exits clean without a registry round-trip (offline-safe re-up).
entrypoint: ["/bin/sh", "-c"]
command:
- "ollama serve & until ollama list >/dev/null 2>&1; do sleep 1; done && (ollama list | grep -q 'qwen2.5:7b-instruct-q4_K_M' || ollama pull qwen2.5:7b-instruct-q4_K_M)"
# --- Ollama: LLM inference server ---
# Serves the pre-pulled model for NL search inference.
# Not started in CI — CI uses explicit service selection
# (docker-compose.ci.yml: db minio create-buckets)
ollama:
image: ollama/ollama:0.30.6
container_name: archive-ollama
restart: unless-stopped restart: unless-stopped
expose: expose:
- "11434" - "8001"
networks: networks:
- archiv-net - archiv-net
volumes:
- ollama_models:/root/.ollama
environment: environment:
OLLAMA_API_KEY: "${OLLAMA_API_KEY}" DATABASE_URL: "postgresql://${POSTGRES_USER}:${POSTGRES_PASSWORD}@db:5432/${POSTGRES_DB}"
# Pin the model in memory (no idle unload) so queries never pay a cold-load NLP_FUZZY_THRESHOLD: "${NLP_FUZZY_THRESHOLD:-80}"
# penalty that exceeds the backend read timeout → NL search 503 after idle. mem_limit: 256m
OLLAMA_KEEP_ALIVE: "-1" memswap_limit: 256m
cpus: "${OLLAMA_CPU_LIMIT:-4.0}"
mem_limit: "${OLLAMA_MEM_LIMIT:-8g}"
memswap_limit: "${OLLAMA_MEM_LIMIT:-8g}"
read_only: true read_only: true
tmpfs: tmpfs:
- /tmp:size=512m - /tmp:size=32m
cap_drop: cap_drop:
- ALL - ALL
security_opt: security_opt:
- no-new-privileges:true - no-new-privileges:true
healthcheck: healthcheck:
# `ollama list` hits the local API and exits non-zero if the server is test: ["CMD", "curl", "-f", "http://localhost:8001/health"]
# down — used instead of curl, which the image does not ship. interval: 10s
test: ["CMD", "ollama", "list"] timeout: 5s
interval: 30s
timeout: 10s
retries: 5 retries: 5
start_period: 60s # model weights are pre-loaded by ollama-model-init; service only needs to bind port start_period: 15s
depends_on: depends_on:
ollama-model-init: db:
condition: service_completed_successfully condition: service_healthy
# --- Backend: Spring Boot --- # --- Backend: Spring Boot ---
backend: backend:
@@ -228,6 +195,8 @@ services:
condition: service_started condition: service_started
ocr-service: ocr-service:
condition: service_started condition: service_started
nlp-service:
condition: service_started
environment: environment:
SPRING_DATASOURCE_URL: jdbc:postgresql://db:5432/${POSTGRES_DB} SPRING_DATASOURCE_URL: jdbc:postgresql://db:5432/${POSTGRES_DB}
SPRING_DATASOURCE_USERNAME: ${POSTGRES_USER} SPRING_DATASOURCE_USERNAME: ${POSTGRES_USER}
@@ -253,8 +222,7 @@ services:
SPRING_MAIL_PROPERTIES_MAIL_SMTP_STARTTLS_ENABLE: ${MAIL_STARTTLS_ENABLE:-false} SPRING_MAIL_PROPERTIES_MAIL_SMTP_STARTTLS_ENABLE: ${MAIL_STARTTLS_ENABLE:-false}
APP_OCR_BASE_URL: http://ocr-service:8000 APP_OCR_BASE_URL: http://ocr-service:8000
APP_OCR_TRAINING_TOKEN: "${OCR_TRAINING_TOKEN:-}" APP_OCR_TRAINING_TOKEN: "${OCR_TRAINING_TOKEN:-}"
APP_OLLAMA_BASE_URL: "${APP_OLLAMA_BASE_URL:-http://ollama:11434}" APP_NLP_BASE_URL: "http://nlp-service:8001"
APP_OLLAMA_API_KEY: "${OLLAMA_API_KEY}"
SENTRY_DSN: ${SENTRY_DSN:-} SENTRY_DSN: ${SENTRY_DSN:-}
SENTRY_TRACES_SAMPLE_RATE: ${SENTRY_TRACES_SAMPLE_RATE:-1.0} SENTRY_TRACES_SAMPLE_RATE: ${SENTRY_TRACES_SAMPLE_RATE:-1.0}
# Observability: send traces to Tempo inside archiv-net (OTLP gRPC port 4317) # Observability: send traces to Tempo inside archiv-net (OTLP gRPC port 4317)
@@ -318,4 +286,3 @@ volumes:
frontend_node_modules: frontend_node_modules:
ocr_models: ocr_models:
ocr_cache: ocr_cache:
ollama_models:

View File

@@ -1,4 +1,4 @@
FROM python:3.11-slim FROM python:3.11.12-slim
WORKDIR /app WORKDIR /app