refactor(search): remove NLP/smart-search feature entirely #772

Merged
marcel merged 51 commits from worktree-feat+nlp-service into main 2026-06-08 10:57:01 +02:00
2 changed files with 0 additions and 39 deletions
Showing only changes of commit 75c9dc4be9 - Show all commits

View File

@@ -12,7 +12,6 @@ System_Boundary(archiv, "Familienarchiv (Docker Compose)") {
Container(frontend, "Web Frontend", "SvelteKit / Node adapter / port 3000", "Server-side rendered UI. Handles auth session cookies, document search and viewer, transcription editor, annotation layer, family tree (Stammbaum), stories (Geschichten), activity feed (Chronik), enrichment workflow, and admin panel.")
Container(backend, "API Backend", "Spring Boot 4 / Java 21 / Jetty / port 8080", "REST API. Implements document management, search, user auth, file upload/download, transcription, OCR orchestration, and SSE notifications. Trusts X-Forwarded-* headers from Caddy.")
Container(ocr, "OCR Service", "Python FastAPI / port 8000", "Handwritten text recognition (HTR) and OCR microservice. Single-node by design — see ADR-001. Reachable only on the internal Docker network; no external port exposed.")
Container(nlp, "NLP Service", "Python FastAPI / port 8001 (internal only)", "Rule-based NL search query parser. Extracts person names (fuzzy DB lookup), date ranges (regex), person role, and keywords. Reachable only on the internal Docker network; no external port exposed.")
ContainerDb(db, "Relational Database", "PostgreSQL 16", "Stores document metadata, persons, users, permission groups, tags, transcription blocks, audit log, and Spring Session data.")
ContainerDb(storage, "Object Storage", "MinIO (S3-compatible)", "Stores the actual document files (PDFs, scans). Backend uses a bucket-scoped service account (archiv-app), not MinIO root.")
Container(mc, "Bucket / Service-Account Init", "MinIO Client (mc)", "One-shot container on startup. Idempotent: creates the archive bucket, the archiv-app service account, and attaches the readwrite policy.")
@@ -42,7 +41,6 @@ Rel(backend, ocr, "OCR job requests with presigned MinIO URL", "HTTP / REST / JS
Rel(backend, mail, "Sends notification and password-reset emails (optional)", "SMTP")
Rel(ocr, storage, "Fetches PDF via presigned URL", "HTTP / S3 presigned")
Rel(mc, storage, "Bootstraps bucket + service account on startup", "MinIO Client CLI")
Rel(backend, nlp, "NL query parsing (POST /parse)", "HTTP / REST / JSON")
Rel(promtail, loki, "Pushes log streams", "HTTP/Loki push API")
Rel(backend, tempo, "Sends distributed traces via OTLP", "HTTP / OTLP / port 4318 (archiv-net)")
Rel(prometheus, backend, "Scrapes JVM + HTTP metrics", "HTTP 8081 /actuator/prometheus")

View File

@@ -1,37 +0,0 @@
@startuml
!include <C4/C4_Component>
title Component Diagram: API Backend — NL Search
Container(frontend, "Web Frontend", "SvelteKit")
ContainerDb(db, "PostgreSQL", "PostgreSQL 16")
Container(ollama, "Ollama", "ollama/ollama — port 11434 (internal only)")
System_Boundary(backend, "API Backend (Spring Boot)") {
Component(nlCtrl, "NlSearchController", "Spring MVC — POST /api/search/nl", "REST entry point for natural language search. Enforces READ_ALL permission. Uses @AuthenticationPrincipal UserDetails to obtain the caller's email for rate limiting. Delegates to NlQueryParserService and returns NlSearchResponse.")
Component(rateLimiter, "NlSearchRateLimiter", "Spring Service", "Bucket4j + Caffeine LoadingCache keyed on user email. Allows 5 NL search requests per minute per user. Throws DomainException(SMART_SEARCH_RATE_LIMITED / HTTP 429) when the bucket is exhausted. Node-local — same caveat as LoginRateLimiter.")
Component(parserSvc, "NlQueryParserService", "Spring Service", "Orchestrates the full NL search pipeline: (1) validates query length, (2) calls OllamaClient.parse() to extract structured intent, (3) resolves keywords to tags via TagService.findByNameContaining(), (4) resolves each person name via PersonService.findByDisplayNameContaining(), (5) applies multi-name / personRole heuristics, (6) delegates to DocumentService.searchDocuments() or searchDocumentsByPersonId(). Returns NlSearchResponse. Never logs raw query content (PII).")
Component(ollamaClient, "RestClientOllamaClient", "Spring Service — implements OllamaClient + OllamaHealthClient", "HTTP client for the Ollama API. Uses two separate RestClient instances: inference client (30 s read timeout) and health-check client (2 s connect timeout). Calls POST /api/generate with grammar-constrained JSON schema (personNames, personRole, dateFrom, dateTo, keywords). isHealthy() polls GET /api/tags. Null-coalesces absent personNames/keywords to List.of(). Defaults unknown personRole to 'any' with a warning log. Maps timeout/5xx/parse errors to DomainException(SMART_SEARCH_UNAVAILABLE / HTTP 503).")
Component(ollamaProps, "OllamaProperties", "@ConfigurationProperties(\"app.ollama\")", "Config bean: baseUrl, model (qwen2.5:7b-instruct-q4_K_M), timeoutSeconds (default: 30), healthCheckTimeoutSeconds (default: 2).")
Component(rateLimitProps, "NlSearchRateLimitProperties", "@ConfigurationProperties(\"app.nl-search.rate-limit\")", "Config bean: maxRequestsPerMinute (default: 5).")
}
Component(personSvc, "PersonService", "Spring Service", "See diagram 3e. findByDisplayNameContaining(fragment) delegates to PersonRepository.searchByName() — covers first+last name, alias, and name aliases via LEFT JOIN.")
Component(documentSvc, "DocumentService", "Spring Service", "See diagram 3b. searchDocuments() for keyword/sender/receiver/date queries. searchDocumentsByPersonId() for OR-semantics single-person queries (person as sender OR receiver, no keyword filter).")
Component(tagSvc, "TagService", "Spring Service", "See diagram 3b. findByNameContaining(fragment) delegates to TagRepository.findByNameContainingIgnoreCase(). resolveEffectiveColors() applies one-level color inheritance in-place on a collection of Tag entities.")
Rel(frontend, nlCtrl, "POST /api/search/nl with JSON query", "HTTP / JSON")
Rel(nlCtrl, rateLimiter, "checkAndConsume(userEmail)")
Rel(nlCtrl, parserSvc, "parse(query)")
Rel(parserSvc, ollamaClient, "parse(rawQuery) — extracts intent", "HTTP / JSON")
Rel(ollamaClient, ollama, "POST /api/generate (grammar-constrained JSON schema)", "HTTP / REST")
Rel(ollamaClient, ollama, "GET /api/tags (health check)", "HTTP / REST")
Rel(parserSvc, tagSvc, "findByNameContaining(keyword) — keyword→tag resolution")
Rel(parserSvc, tagSvc, "resolveEffectiveColors(tags)")
Rel(parserSvc, personSvc, "findByDisplayNameContaining(name) for each extracted name")
Rel(parserSvc, documentSvc, "searchDocuments() or searchDocumentsByPersonId()")
Rel(documentSvc, db, "JPA queries", "JDBC")
Rel(personSvc, db, "JPA queries", "JDBC")
Rel(tagSvc, db, "JPA queries", "JDBC")
@enduml