feat(infra): add Ollama Docker Compose services for NL search (#737)

- ollama-model-init: one-shot init container that pulls qwen2.5:7b-instruct-q4_K_M into the ollama_models volume on first start - ollama: main inference service on archiv-net (expose: only, no public port) - ollama_models named volume for persistent model storage - APP_OLLAMA_BASE_URL + APP_OLLAMA_API_KEY added to backend env - Both services: cap_drop ALL, no-new-privileges, read_only+tmpfs (ADR-019 + ADR-028) - start_period: 60s — model pre-pulled by init container Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-06-06 14:01:41 +02:00
parent df10a42069
commit 9637ebbca2
1 changed files with 62 additions and 0 deletions
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -141,6 +141,65 @@ services:
    security_opt:
      - no-new-privileges:true

+  # --- Ollama: Model init (one-shot pull) ---
+  # Pulls qwen2.5:7b-instruct-q4_K_M (~4.7 GB) into the ollama_models volume on first start.
+  # On subsequent starts (model already in volume), exits quickly without re-downloading.
+  # Not started in CI — CI uses explicit service selection
+  # (docker-compose.ci.yml: db minio create-buckets)
+  ollama-model-init:
+    image: ollama/ollama:0.30.6
+    restart: "no"
+    networks:
+      - archiv-net
+    volumes:
+      - ollama_models:/root/.ollama
+    mem_limit: 2g
+    read_only: true
+    tmpfs:
+      - /tmp:size=512m
+    cap_drop:
+      - ALL
+    security_opt:
+      - no-new-privileges:true
+    command: >
+      sh -c "ollama serve & SERVE_PID=$! && until curl -sf http://localhost:11434/api/tags; do sleep 1; done && ollama pull qwen2.5:7b-instruct-q4_K_M && kill $SERVE_PID"
+
+  # --- Ollama: LLM inference server ---
+  # Serves the pre-pulled model for NL search inference.
+  # Not started in CI — CI uses explicit service selection
+  # (docker-compose.ci.yml: db minio create-buckets)
+  ollama:
+    image: ollama/ollama:0.30.6
+    container_name: archive-ollama
+    restart: unless-stopped
+    expose:
+      - "11434"
+    networks:
+      - archiv-net
+    volumes:
+      - ollama_models:/root/.ollama
+    environment:
+      OLLAMA_API_KEY: "${OLLAMA_API_KEY}"
+    cpus: "${OLLAMA_CPU_LIMIT:-4.0}"
+    mem_limit: "${OLLAMA_MEM_LIMIT:-8g}"
+    memswap_limit: "${OLLAMA_MEM_LIMIT:-8g}"
+    read_only: true
+    tmpfs:
+      - /tmp:size=512m
+    cap_drop:
+      - ALL
+    security_opt:
+      - no-new-privileges:true
+    healthcheck:
+      test: ["CMD", "curl", "-f", "http://localhost:11434/api/tags"]
+      interval: 30s
+      timeout: 10s
+      retries: 5
+      start_period: 60s  # model weights are pre-loaded by ollama-model-init; service only needs to bind port
+    depends_on:
+      ollama-model-init:
+        condition: service_completed_successfully
+
  # --- Backend: Spring Boot ---
  backend:
    build:
@@ -184,6 +243,8 @@ services:
      SPRING_MAIL_PROPERTIES_MAIL_SMTP_STARTTLS_ENABLE: ${MAIL_STARTTLS_ENABLE:-false}
      APP_OCR_BASE_URL: http://ocr-service:8000
      APP_OCR_TRAINING_TOKEN: "${OCR_TRAINING_TOKEN:-}"
+      APP_OLLAMA_BASE_URL: http://ollama:11434
+      APP_OLLAMA_API_KEY: "${OLLAMA_API_KEY}"
      SENTRY_DSN: ${SENTRY_DSN:-}
      SENTRY_TRACES_SAMPLE_RATE: ${SENTRY_TRACES_SAMPLE_RATE:-1.0}
      # Observability: send traces to Tempo inside archiv-net (OTLP gRPC port 4317)
@@ -247,3 +308,4 @@ volumes:
  frontend_node_modules:
  ocr_models:
  ocr_cache:
+  ollama_models: