"""FastAPI app — /parse and /health endpoints.""" from __future__ import annotations import logging import os from contextlib import asynccontextmanager from fastapi import FastAPI, HTTPException logger = logging.getLogger(__name__) from extractor import extract, get_person_matcher, set_fuzzy_threshold, set_person_matcher from models import ParseRequest, ParseResponse from person_matcher import PersonMatcher _DEFAULT_FUZZY_THRESHOLD = 80 def _parse_fuzzy_threshold(val: str) -> int: """Parse and validate NLP_FUZZY_THRESHOLD — must be integer in [0, 100].""" try: n = int(val) except ValueError: raise ValueError(f"NLP_FUZZY_THRESHOLD must be an integer, got: {val!r}") if not (0 <= n <= 100): raise ValueError(f"NLP_FUZZY_THRESHOLD must be between 0 and 100, got: {n}") return n def _load_persons_from_db(db_url: str) -> list[tuple[str | None, str | None]]: import psycopg2 # deferred — not available in test environments without a DB conn = psycopg2.connect(db_url) try: cur = conn.cursor() cur.execute("SELECT first_name, last_name FROM persons") return cur.fetchall() finally: conn.close() @asynccontextmanager async def lifespan(app: FastAPI): threshold_raw = os.environ.get("NLP_FUZZY_THRESHOLD", str(_DEFAULT_FUZZY_THRESHOLD)) threshold = _parse_fuzzy_threshold(threshold_raw) set_fuzzy_threshold(threshold) # Only initialise the matcher when nothing was pre-seeded (e.g., by tests). if get_person_matcher() is None: m = PersonMatcher() db_url = os.environ.get("DATABASE_URL") if db_url: try: rows = _load_persons_from_db(db_url) m.load(rows) logger.info("PersonMatcher loaded %d name variants from DB", len(m)) except Exception: logger.error("Failed to load persons from DB — person matching disabled", exc_info=True) else: logger.warning("DATABASE_URL not set — person matching disabled") set_person_matcher(m) yield app = FastAPI(lifespan=lifespan) @app.get("/health") def health() -> dict: m = get_person_matcher() return {"status": "ok", "persons_loaded": len(m) if m else 0} @app.post("/parse", response_model=ParseResponse) def parse(request: ParseRequest) -> ParseResponse: try: return extract(request.query, request.lang) except Exception as exc: raise HTTPException(status_code=500, detail="internal error") from exc