feat(infra): add Ollama Docker Compose services for NL search (#737)
- ollama-model-init: one-shot init container that pulls qwen2.5:7b-instruct-q4_K_M into the ollama_models volume on first start - ollama: main inference service on archiv-net (expose: only, no public port) - ollama_models named volume for persistent model storage - APP_OLLAMA_BASE_URL + APP_OLLAMA_API_KEY added to backend env - Both services: cap_drop ALL, no-new-privileges, read_only+tmpfs (ADR-019 + ADR-028) - start_period: 60s — model pre-pulled by init container Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -141,6 +141,65 @@ services:
|
|||||||
security_opt:
|
security_opt:
|
||||||
- no-new-privileges:true
|
- no-new-privileges:true
|
||||||
|
|
||||||
|
# --- Ollama: Model init (one-shot pull) ---
|
||||||
|
# Pulls qwen2.5:7b-instruct-q4_K_M (~4.7 GB) into the ollama_models volume on first start.
|
||||||
|
# On subsequent starts (model already in volume), exits quickly without re-downloading.
|
||||||
|
# Not started in CI — CI uses explicit service selection
|
||||||
|
# (docker-compose.ci.yml: db minio create-buckets)
|
||||||
|
ollama-model-init:
|
||||||
|
image: ollama/ollama:0.30.6
|
||||||
|
restart: "no"
|
||||||
|
networks:
|
||||||
|
- archiv-net
|
||||||
|
volumes:
|
||||||
|
- ollama_models:/root/.ollama
|
||||||
|
mem_limit: 2g
|
||||||
|
read_only: true
|
||||||
|
tmpfs:
|
||||||
|
- /tmp:size=512m
|
||||||
|
cap_drop:
|
||||||
|
- ALL
|
||||||
|
security_opt:
|
||||||
|
- no-new-privileges:true
|
||||||
|
command: >
|
||||||
|
sh -c "ollama serve & SERVE_PID=$! && until curl -sf http://localhost:11434/api/tags; do sleep 1; done && ollama pull qwen2.5:7b-instruct-q4_K_M && kill $SERVE_PID"
|
||||||
|
|
||||||
|
# --- Ollama: LLM inference server ---
|
||||||
|
# Serves the pre-pulled model for NL search inference.
|
||||||
|
# Not started in CI — CI uses explicit service selection
|
||||||
|
# (docker-compose.ci.yml: db minio create-buckets)
|
||||||
|
ollama:
|
||||||
|
image: ollama/ollama:0.30.6
|
||||||
|
container_name: archive-ollama
|
||||||
|
restart: unless-stopped
|
||||||
|
expose:
|
||||||
|
- "11434"
|
||||||
|
networks:
|
||||||
|
- archiv-net
|
||||||
|
volumes:
|
||||||
|
- ollama_models:/root/.ollama
|
||||||
|
environment:
|
||||||
|
OLLAMA_API_KEY: "${OLLAMA_API_KEY}"
|
||||||
|
cpus: "${OLLAMA_CPU_LIMIT:-4.0}"
|
||||||
|
mem_limit: "${OLLAMA_MEM_LIMIT:-8g}"
|
||||||
|
memswap_limit: "${OLLAMA_MEM_LIMIT:-8g}"
|
||||||
|
read_only: true
|
||||||
|
tmpfs:
|
||||||
|
- /tmp:size=512m
|
||||||
|
cap_drop:
|
||||||
|
- ALL
|
||||||
|
security_opt:
|
||||||
|
- no-new-privileges:true
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD", "curl", "-f", "http://localhost:11434/api/tags"]
|
||||||
|
interval: 30s
|
||||||
|
timeout: 10s
|
||||||
|
retries: 5
|
||||||
|
start_period: 60s # model weights are pre-loaded by ollama-model-init; service only needs to bind port
|
||||||
|
depends_on:
|
||||||
|
ollama-model-init:
|
||||||
|
condition: service_completed_successfully
|
||||||
|
|
||||||
# --- Backend: Spring Boot ---
|
# --- Backend: Spring Boot ---
|
||||||
backend:
|
backend:
|
||||||
build:
|
build:
|
||||||
@@ -184,6 +243,8 @@ services:
|
|||||||
SPRING_MAIL_PROPERTIES_MAIL_SMTP_STARTTLS_ENABLE: ${MAIL_STARTTLS_ENABLE:-false}
|
SPRING_MAIL_PROPERTIES_MAIL_SMTP_STARTTLS_ENABLE: ${MAIL_STARTTLS_ENABLE:-false}
|
||||||
APP_OCR_BASE_URL: http://ocr-service:8000
|
APP_OCR_BASE_URL: http://ocr-service:8000
|
||||||
APP_OCR_TRAINING_TOKEN: "${OCR_TRAINING_TOKEN:-}"
|
APP_OCR_TRAINING_TOKEN: "${OCR_TRAINING_TOKEN:-}"
|
||||||
|
APP_OLLAMA_BASE_URL: http://ollama:11434
|
||||||
|
APP_OLLAMA_API_KEY: "${OLLAMA_API_KEY}"
|
||||||
SENTRY_DSN: ${SENTRY_DSN:-}
|
SENTRY_DSN: ${SENTRY_DSN:-}
|
||||||
SENTRY_TRACES_SAMPLE_RATE: ${SENTRY_TRACES_SAMPLE_RATE:-1.0}
|
SENTRY_TRACES_SAMPLE_RATE: ${SENTRY_TRACES_SAMPLE_RATE:-1.0}
|
||||||
# Observability: send traces to Tempo inside archiv-net (OTLP gRPC port 4317)
|
# Observability: send traces to Tempo inside archiv-net (OTLP gRPC port 4317)
|
||||||
@@ -247,3 +308,4 @@ volumes:
|
|||||||
frontend_node_modules:
|
frontend_node_modules:
|
||||||
ocr_models:
|
ocr_models:
|
||||||
ocr_cache:
|
ocr_cache:
|
||||||
|
ollama_models:
|
||||||
|
|||||||
Reference in New Issue
Block a user