familienarchiv/nlp-service/main.py

"""FastAPI app — /parse and /health endpoints."""
from __future__ import annotations

import os
from contextlib import asynccontextmanager

from fastapi import FastAPI, HTTPException

from extractor import extract, get_person_matcher, set_person_matcher
from models import ParseRequest, ParseResponse
from person_matcher import PersonMatcher


def _load_persons_from_db(db_url: str) -> list[tuple[str | None, str | None]]:
    import psycopg2  # deferred — not available in test environments without a DB

    conn = psycopg2.connect(db_url)
    try:
        cur = conn.cursor()
        cur.execute("SELECT first_name, last_name FROM persons")
        return cur.fetchall()
    finally:
        conn.close()


@asynccontextmanager
async def lifespan(app: FastAPI):
    # Only initialise the matcher when nothing was pre-seeded (e.g., by tests).
    if get_person_matcher() is None:
        m = PersonMatcher()
        db_url = os.environ.get("DATABASE_URL")
        if db_url:
            rows = _load_persons_from_db(db_url)
            m.load(rows)
        set_person_matcher(m)
    yield


app = FastAPI(lifespan=lifespan)


@app.get("/health")
def health() -> dict:
    m = get_person_matcher()
    return {"status": "ok", "persons_loaded": len(m) if m else 0}


@app.post("/parse", response_model=ParseResponse)
def parse(request: ParseRequest) -> ParseResponse:
    try:
        return extract(request.query, request.lang)
    except Exception as exc:
        raise HTTPException(status_code=500, detail=str(exc)) from exc