Files
familienarchiv/nlp-service/main.py
2026-06-08 10:56:32 +02:00

80 lines
2.5 KiB
Python

"""FastAPI app — /parse and /health endpoints."""
from __future__ import annotations
import logging
import os
from contextlib import asynccontextmanager
from fastapi import FastAPI, HTTPException
logger = logging.getLogger(__name__)
from extractor import extract, get_person_matcher, set_fuzzy_threshold, set_person_matcher
from models import ParseRequest, ParseResponse
from person_matcher import PersonMatcher
_DEFAULT_FUZZY_THRESHOLD = 80
def _parse_fuzzy_threshold(val: str) -> int:
"""Parse and validate NLP_FUZZY_THRESHOLD — must be integer in [0, 100]."""
try:
n = int(val)
except ValueError:
raise ValueError(f"NLP_FUZZY_THRESHOLD must be an integer, got: {val!r}")
if not (0 <= n <= 100):
raise ValueError(f"NLP_FUZZY_THRESHOLD must be between 0 and 100, got: {n}")
return n
def _load_persons_from_db(db_url: str) -> list[tuple[str | None, str | None]]:
import psycopg2 # deferred — not available in test environments without a DB
conn = psycopg2.connect(db_url)
try:
cur = conn.cursor()
cur.execute("SELECT first_name, last_name FROM persons")
return cur.fetchall()
finally:
conn.close()
@asynccontextmanager
async def lifespan(app: FastAPI):
threshold_raw = os.environ.get("NLP_FUZZY_THRESHOLD", str(_DEFAULT_FUZZY_THRESHOLD))
threshold = _parse_fuzzy_threshold(threshold_raw)
set_fuzzy_threshold(threshold)
# Only initialise the matcher when nothing was pre-seeded (e.g., by tests).
if get_person_matcher() is None:
m = PersonMatcher()
db_url = os.environ.get("DATABASE_URL")
if db_url:
try:
rows = _load_persons_from_db(db_url)
m.load(rows)
logger.info("PersonMatcher loaded %d name variants from DB", len(m))
except Exception:
logger.error("Failed to load persons from DB — person matching disabled", exc_info=True)
else:
logger.warning("DATABASE_URL not set — person matching disabled")
set_person_matcher(m)
yield
app = FastAPI(lifespan=lifespan)
@app.get("/health")
def health() -> dict:
m = get_person_matcher()
return {"status": "ok", "persons_loaded": len(m) if m else 0}
@app.post("/parse", response_model=ParseResponse)
def parse(request: ParseRequest) -> ParseResponse:
try:
return extract(request.query, request.lang)
except Exception as exc:
raise HTTPException(status_code=500, detail="internal error") from exc