diff --git a/docker-compose.yml b/docker-compose.yml index f9e618ea..eb0a75ce 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -141,74 +141,41 @@ services: security_opt: - no-new-privileges:true - # --- Ollama: Model init (one-shot pull) --- - # Pulls qwen2.5:7b-instruct-q4_K_M (~4.7 GB) into the ollama_models volume on first start. - # On subsequent starts (model already in volume), exits quickly without re-downloading. + # --- NLP service: rule-based NL query parser --- + # FastAPI Python service; replaces Ollama for smart search query parsing. # Not started in CI — CI uses explicit service selection # (docker-compose.ci.yml: db minio create-buckets) - ollama-model-init: - image: ollama/ollama:0.30.6 - restart: "no" - networks: - - archiv-net - volumes: - - ollama_models:/root/.ollama - mem_limit: 2g - read_only: true - tmpfs: - - /tmp:size=512m - cap_drop: - - ALL - security_opt: - - no-new-privileges:true - # The image ENTRYPOINT is `ollama`, so override it to a shell; the image has - # no curl, so readiness is probed with `ollama list` instead of a curl loop. - # The pull is guarded by a `grep` on the cached model list so an already-cached - # model exits clean without a registry round-trip (offline-safe re-up). - entrypoint: ["/bin/sh", "-c"] - command: - - "ollama serve & until ollama list >/dev/null 2>&1; do sleep 1; done && (ollama list | grep -q 'qwen2.5:7b-instruct-q4_K_M' || ollama pull qwen2.5:7b-instruct-q4_K_M)" - - # --- Ollama: LLM inference server --- - # Serves the pre-pulled model for NL search inference. - # Not started in CI — CI uses explicit service selection - # (docker-compose.ci.yml: db minio create-buckets) - ollama: - image: ollama/ollama:0.30.6 - container_name: archive-ollama + nlp-service: + build: + context: ./nlp-service + dockerfile: Dockerfile + container_name: archive-nlp restart: unless-stopped expose: - - "11434" + - "8001" networks: - archiv-net - volumes: - - ollama_models:/root/.ollama environment: - OLLAMA_API_KEY: "${OLLAMA_API_KEY}" - # Pin the model in memory (no idle unload) so queries never pay a cold-load - # penalty that exceeds the backend read timeout → NL search 503 after idle. - OLLAMA_KEEP_ALIVE: "-1" - cpus: "${OLLAMA_CPU_LIMIT:-4.0}" - mem_limit: "${OLLAMA_MEM_LIMIT:-8g}" - memswap_limit: "${OLLAMA_MEM_LIMIT:-8g}" + DATABASE_URL: "postgresql://${POSTGRES_USER}:${POSTGRES_PASSWORD}@db:5432/${POSTGRES_DB}" + NLP_FUZZY_THRESHOLD: "${NLP_FUZZY_THRESHOLD:-80}" + mem_limit: 256m + memswap_limit: 256m read_only: true tmpfs: - - /tmp:size=512m + - /tmp:size=32m cap_drop: - ALL security_opt: - no-new-privileges:true healthcheck: - # `ollama list` hits the local API and exits non-zero if the server is - # down — used instead of curl, which the image does not ship. - test: ["CMD", "ollama", "list"] - interval: 30s - timeout: 10s + test: ["CMD", "curl", "-f", "http://localhost:8001/health"] + interval: 10s + timeout: 5s retries: 5 - start_period: 60s # model weights are pre-loaded by ollama-model-init; service only needs to bind port + start_period: 15s depends_on: - ollama-model-init: - condition: service_completed_successfully + db: + condition: service_healthy # --- Backend: Spring Boot --- backend: @@ -228,6 +195,8 @@ services: condition: service_started ocr-service: condition: service_started + nlp-service: + condition: service_started environment: SPRING_DATASOURCE_URL: jdbc:postgresql://db:5432/${POSTGRES_DB} SPRING_DATASOURCE_USERNAME: ${POSTGRES_USER} @@ -253,8 +222,7 @@ services: SPRING_MAIL_PROPERTIES_MAIL_SMTP_STARTTLS_ENABLE: ${MAIL_STARTTLS_ENABLE:-false} APP_OCR_BASE_URL: http://ocr-service:8000 APP_OCR_TRAINING_TOKEN: "${OCR_TRAINING_TOKEN:-}" - APP_OLLAMA_BASE_URL: "${APP_OLLAMA_BASE_URL:-http://ollama:11434}" - APP_OLLAMA_API_KEY: "${OLLAMA_API_KEY}" + APP_NLP_BASE_URL: "http://nlp-service:8001" SENTRY_DSN: ${SENTRY_DSN:-} SENTRY_TRACES_SAMPLE_RATE: ${SENTRY_TRACES_SAMPLE_RATE:-1.0} # Observability: send traces to Tempo inside archiv-net (OTLP gRPC port 4317) @@ -318,4 +286,3 @@ volumes: frontend_node_modules: ocr_models: ocr_cache: - ollama_models: diff --git a/nlp-service/Dockerfile b/nlp-service/Dockerfile index 61c723b0..ccb9e7e6 100644 --- a/nlp-service/Dockerfile +++ b/nlp-service/Dockerfile @@ -1,4 +1,4 @@ -FROM python:3.11-slim +FROM python:3.11.12-slim WORKDIR /app