feat(infra): replace Ollama with nlp-service in docker-compose
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -141,74 +141,41 @@ services:
|
|||||||
security_opt:
|
security_opt:
|
||||||
- no-new-privileges:true
|
- no-new-privileges:true
|
||||||
|
|
||||||
# --- Ollama: Model init (one-shot pull) ---
|
# --- NLP service: rule-based NL query parser ---
|
||||||
# Pulls qwen2.5:7b-instruct-q4_K_M (~4.7 GB) into the ollama_models volume on first start.
|
# FastAPI Python service; replaces Ollama for smart search query parsing.
|
||||||
# On subsequent starts (model already in volume), exits quickly without re-downloading.
|
|
||||||
# Not started in CI — CI uses explicit service selection
|
# Not started in CI — CI uses explicit service selection
|
||||||
# (docker-compose.ci.yml: db minio create-buckets)
|
# (docker-compose.ci.yml: db minio create-buckets)
|
||||||
ollama-model-init:
|
nlp-service:
|
||||||
image: ollama/ollama:0.30.6
|
build:
|
||||||
restart: "no"
|
context: ./nlp-service
|
||||||
networks:
|
dockerfile: Dockerfile
|
||||||
- archiv-net
|
container_name: archive-nlp
|
||||||
volumes:
|
|
||||||
- ollama_models:/root/.ollama
|
|
||||||
mem_limit: 2g
|
|
||||||
read_only: true
|
|
||||||
tmpfs:
|
|
||||||
- /tmp:size=512m
|
|
||||||
cap_drop:
|
|
||||||
- ALL
|
|
||||||
security_opt:
|
|
||||||
- no-new-privileges:true
|
|
||||||
# The image ENTRYPOINT is `ollama`, so override it to a shell; the image has
|
|
||||||
# no curl, so readiness is probed with `ollama list` instead of a curl loop.
|
|
||||||
# The pull is guarded by a `grep` on the cached model list so an already-cached
|
|
||||||
# model exits clean without a registry round-trip (offline-safe re-up).
|
|
||||||
entrypoint: ["/bin/sh", "-c"]
|
|
||||||
command:
|
|
||||||
- "ollama serve & until ollama list >/dev/null 2>&1; do sleep 1; done && (ollama list | grep -q 'qwen2.5:7b-instruct-q4_K_M' || ollama pull qwen2.5:7b-instruct-q4_K_M)"
|
|
||||||
|
|
||||||
# --- Ollama: LLM inference server ---
|
|
||||||
# Serves the pre-pulled model for NL search inference.
|
|
||||||
# Not started in CI — CI uses explicit service selection
|
|
||||||
# (docker-compose.ci.yml: db minio create-buckets)
|
|
||||||
ollama:
|
|
||||||
image: ollama/ollama:0.30.6
|
|
||||||
container_name: archive-ollama
|
|
||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
expose:
|
expose:
|
||||||
- "11434"
|
- "8001"
|
||||||
networks:
|
networks:
|
||||||
- archiv-net
|
- archiv-net
|
||||||
volumes:
|
|
||||||
- ollama_models:/root/.ollama
|
|
||||||
environment:
|
environment:
|
||||||
OLLAMA_API_KEY: "${OLLAMA_API_KEY}"
|
DATABASE_URL: "postgresql://${POSTGRES_USER}:${POSTGRES_PASSWORD}@db:5432/${POSTGRES_DB}"
|
||||||
# Pin the model in memory (no idle unload) so queries never pay a cold-load
|
NLP_FUZZY_THRESHOLD: "${NLP_FUZZY_THRESHOLD:-80}"
|
||||||
# penalty that exceeds the backend read timeout → NL search 503 after idle.
|
mem_limit: 256m
|
||||||
OLLAMA_KEEP_ALIVE: "-1"
|
memswap_limit: 256m
|
||||||
cpus: "${OLLAMA_CPU_LIMIT:-4.0}"
|
|
||||||
mem_limit: "${OLLAMA_MEM_LIMIT:-8g}"
|
|
||||||
memswap_limit: "${OLLAMA_MEM_LIMIT:-8g}"
|
|
||||||
read_only: true
|
read_only: true
|
||||||
tmpfs:
|
tmpfs:
|
||||||
- /tmp:size=512m
|
- /tmp:size=32m
|
||||||
cap_drop:
|
cap_drop:
|
||||||
- ALL
|
- ALL
|
||||||
security_opt:
|
security_opt:
|
||||||
- no-new-privileges:true
|
- no-new-privileges:true
|
||||||
healthcheck:
|
healthcheck:
|
||||||
# `ollama list` hits the local API and exits non-zero if the server is
|
test: ["CMD", "curl", "-f", "http://localhost:8001/health"]
|
||||||
# down — used instead of curl, which the image does not ship.
|
interval: 10s
|
||||||
test: ["CMD", "ollama", "list"]
|
timeout: 5s
|
||||||
interval: 30s
|
|
||||||
timeout: 10s
|
|
||||||
retries: 5
|
retries: 5
|
||||||
start_period: 60s # model weights are pre-loaded by ollama-model-init; service only needs to bind port
|
start_period: 15s
|
||||||
depends_on:
|
depends_on:
|
||||||
ollama-model-init:
|
db:
|
||||||
condition: service_completed_successfully
|
condition: service_healthy
|
||||||
|
|
||||||
# --- Backend: Spring Boot ---
|
# --- Backend: Spring Boot ---
|
||||||
backend:
|
backend:
|
||||||
@@ -228,6 +195,8 @@ services:
|
|||||||
condition: service_started
|
condition: service_started
|
||||||
ocr-service:
|
ocr-service:
|
||||||
condition: service_started
|
condition: service_started
|
||||||
|
nlp-service:
|
||||||
|
condition: service_started
|
||||||
environment:
|
environment:
|
||||||
SPRING_DATASOURCE_URL: jdbc:postgresql://db:5432/${POSTGRES_DB}
|
SPRING_DATASOURCE_URL: jdbc:postgresql://db:5432/${POSTGRES_DB}
|
||||||
SPRING_DATASOURCE_USERNAME: ${POSTGRES_USER}
|
SPRING_DATASOURCE_USERNAME: ${POSTGRES_USER}
|
||||||
@@ -253,8 +222,7 @@ services:
|
|||||||
SPRING_MAIL_PROPERTIES_MAIL_SMTP_STARTTLS_ENABLE: ${MAIL_STARTTLS_ENABLE:-false}
|
SPRING_MAIL_PROPERTIES_MAIL_SMTP_STARTTLS_ENABLE: ${MAIL_STARTTLS_ENABLE:-false}
|
||||||
APP_OCR_BASE_URL: http://ocr-service:8000
|
APP_OCR_BASE_URL: http://ocr-service:8000
|
||||||
APP_OCR_TRAINING_TOKEN: "${OCR_TRAINING_TOKEN:-}"
|
APP_OCR_TRAINING_TOKEN: "${OCR_TRAINING_TOKEN:-}"
|
||||||
APP_OLLAMA_BASE_URL: "${APP_OLLAMA_BASE_URL:-http://ollama:11434}"
|
APP_NLP_BASE_URL: "http://nlp-service:8001"
|
||||||
APP_OLLAMA_API_KEY: "${OLLAMA_API_KEY}"
|
|
||||||
SENTRY_DSN: ${SENTRY_DSN:-}
|
SENTRY_DSN: ${SENTRY_DSN:-}
|
||||||
SENTRY_TRACES_SAMPLE_RATE: ${SENTRY_TRACES_SAMPLE_RATE:-1.0}
|
SENTRY_TRACES_SAMPLE_RATE: ${SENTRY_TRACES_SAMPLE_RATE:-1.0}
|
||||||
# Observability: send traces to Tempo inside archiv-net (OTLP gRPC port 4317)
|
# Observability: send traces to Tempo inside archiv-net (OTLP gRPC port 4317)
|
||||||
@@ -318,4 +286,3 @@ volumes:
|
|||||||
frontend_node_modules:
|
frontend_node_modules:
|
||||||
ocr_models:
|
ocr_models:
|
||||||
ocr_cache:
|
ocr_cache:
|
||||||
ollama_models:
|
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
FROM python:3.11-slim
|
FROM python:3.11.12-slim
|
||||||
|
|
||||||
WORKDIR /app
|
WORKDIR /app
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user