diff --git a/docker-compose.yml b/docker-compose.yml index 74f1bd3e..a87cb84e 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -141,6 +141,65 @@ services: security_opt: - no-new-privileges:true + # --- Ollama: Model init (one-shot pull) --- + # Pulls qwen2.5:7b-instruct-q4_K_M (~4.7 GB) into the ollama_models volume on first start. + # On subsequent starts (model already in volume), exits quickly without re-downloading. + # Not started in CI — CI uses explicit service selection + # (docker-compose.ci.yml: db minio create-buckets) + ollama-model-init: + image: ollama/ollama:0.30.6 + restart: "no" + networks: + - archiv-net + volumes: + - ollama_models:/root/.ollama + mem_limit: 2g + read_only: true + tmpfs: + - /tmp:size=512m + cap_drop: + - ALL + security_opt: + - no-new-privileges:true + command: > + sh -c "ollama serve & SERVE_PID=$! && until curl -sf http://localhost:11434/api/tags; do sleep 1; done && ollama pull qwen2.5:7b-instruct-q4_K_M && kill $SERVE_PID" + + # --- Ollama: LLM inference server --- + # Serves the pre-pulled model for NL search inference. + # Not started in CI — CI uses explicit service selection + # (docker-compose.ci.yml: db minio create-buckets) + ollama: + image: ollama/ollama:0.30.6 + container_name: archive-ollama + restart: unless-stopped + expose: + - "11434" + networks: + - archiv-net + volumes: + - ollama_models:/root/.ollama + environment: + OLLAMA_API_KEY: "${OLLAMA_API_KEY}" + cpus: "${OLLAMA_CPU_LIMIT:-4.0}" + mem_limit: "${OLLAMA_MEM_LIMIT:-8g}" + memswap_limit: "${OLLAMA_MEM_LIMIT:-8g}" + read_only: true + tmpfs: + - /tmp:size=512m + cap_drop: + - ALL + security_opt: + - no-new-privileges:true + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:11434/api/tags"] + interval: 30s + timeout: 10s + retries: 5 + start_period: 60s # model weights are pre-loaded by ollama-model-init; service only needs to bind port + depends_on: + ollama-model-init: + condition: service_completed_successfully + # --- Backend: Spring Boot --- backend: build: @@ -184,6 +243,8 @@ services: SPRING_MAIL_PROPERTIES_MAIL_SMTP_STARTTLS_ENABLE: ${MAIL_STARTTLS_ENABLE:-false} APP_OCR_BASE_URL: http://ocr-service:8000 APP_OCR_TRAINING_TOKEN: "${OCR_TRAINING_TOKEN:-}" + APP_OLLAMA_BASE_URL: http://ollama:11434 + APP_OLLAMA_API_KEY: "${OLLAMA_API_KEY}" SENTRY_DSN: ${SENTRY_DSN:-} SENTRY_TRACES_SAMPLE_RATE: ${SENTRY_TRACES_SAMPLE_RATE:-1.0} # Observability: send traces to Tempo inside archiv-net (OTLP gRPC port 4317) @@ -247,3 +308,4 @@ volumes: frontend_node_modules: ocr_models: ocr_cache: + ollama_models: