80 lines
2.5 KiB
Python
80 lines
2.5 KiB
Python
"""FastAPI app — /parse and /health endpoints."""
|
|
from __future__ import annotations
|
|
|
|
import logging
|
|
import os
|
|
from contextlib import asynccontextmanager
|
|
|
|
from fastapi import FastAPI, HTTPException
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
from extractor import extract, get_person_matcher, set_fuzzy_threshold, set_person_matcher
|
|
from models import ParseRequest, ParseResponse
|
|
from person_matcher import PersonMatcher
|
|
|
|
_DEFAULT_FUZZY_THRESHOLD = 80
|
|
|
|
|
|
def _parse_fuzzy_threshold(val: str) -> int:
|
|
"""Parse and validate NLP_FUZZY_THRESHOLD — must be integer in [0, 100]."""
|
|
try:
|
|
n = int(val)
|
|
except ValueError:
|
|
raise ValueError(f"NLP_FUZZY_THRESHOLD must be an integer, got: {val!r}")
|
|
if not (0 <= n <= 100):
|
|
raise ValueError(f"NLP_FUZZY_THRESHOLD must be between 0 and 100, got: {n}")
|
|
return n
|
|
|
|
|
|
def _load_persons_from_db(db_url: str) -> list[tuple[str | None, str | None]]:
|
|
import psycopg2 # deferred — not available in test environments without a DB
|
|
|
|
conn = psycopg2.connect(db_url)
|
|
try:
|
|
cur = conn.cursor()
|
|
cur.execute("SELECT first_name, last_name FROM persons")
|
|
return cur.fetchall()
|
|
finally:
|
|
conn.close()
|
|
|
|
|
|
@asynccontextmanager
|
|
async def lifespan(app: FastAPI):
|
|
threshold_raw = os.environ.get("NLP_FUZZY_THRESHOLD", str(_DEFAULT_FUZZY_THRESHOLD))
|
|
threshold = _parse_fuzzy_threshold(threshold_raw)
|
|
set_fuzzy_threshold(threshold)
|
|
|
|
# Only initialise the matcher when nothing was pre-seeded (e.g., by tests).
|
|
if get_person_matcher() is None:
|
|
m = PersonMatcher()
|
|
db_url = os.environ.get("DATABASE_URL")
|
|
if db_url:
|
|
try:
|
|
rows = _load_persons_from_db(db_url)
|
|
m.load(rows)
|
|
logger.info("PersonMatcher loaded %d name variants from DB", len(m))
|
|
except Exception:
|
|
logger.error("Failed to load persons from DB — person matching disabled", exc_info=True)
|
|
else:
|
|
logger.warning("DATABASE_URL not set — person matching disabled")
|
|
set_person_matcher(m)
|
|
yield
|
|
|
|
|
|
app = FastAPI(lifespan=lifespan)
|
|
|
|
|
|
@app.get("/health")
|
|
def health() -> dict:
|
|
m = get_person_matcher()
|
|
return {"status": "ok", "persons_loaded": len(m) if m else 0}
|
|
|
|
|
|
@app.post("/parse", response_model=ParseResponse)
|
|
def parse(request: ParseRequest) -> ParseResponse:
|
|
try:
|
|
return extract(request.query, request.lang)
|
|
except Exception as exc:
|
|
raise HTTPException(status_code=500, detail="internal error") from exc
|