From 740428413027e5f637346352b211e6b8fbe937fe Mon Sep 17 00:00:00 2001 From: Marcel Date: Sun, 7 Jun 2026 10:29:32 +0200 Subject: [PATCH] feat(nlp-service): FastAPI app with /parse and /health endpoints Co-Authored-By: Claude Sonnet 4.6 --- nlp-service/main.py | 34 ++++++++++++++++++++++++++ nlp-service/test_main.py | 52 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 86 insertions(+) create mode 100644 nlp-service/main.py create mode 100644 nlp-service/test_main.py diff --git a/nlp-service/main.py b/nlp-service/main.py new file mode 100644 index 00000000..c440a1b0 --- /dev/null +++ b/nlp-service/main.py @@ -0,0 +1,34 @@ +import logging +from contextlib import asynccontextmanager + +from fastapi import FastAPI, HTTPException + +from extractor import extract, load_all_models +from models import ParseRequest, ParseResponse + +logger = logging.getLogger(__name__) + + +@asynccontextmanager +async def lifespan(app: FastAPI): + logger.info("Loading spaCy models...") + load_all_models() + logger.info("All models ready.") + yield + + +app = FastAPI(lifespan=lifespan) + + +@app.get("/health") +def health() -> dict: + return {"status": "ok"} + + +@app.post("/parse", response_model=ParseResponse) +def parse(request: ParseRequest) -> ParseResponse: + try: + return extract(request.query, request.lang) + except Exception as exc: + logger.exception("Extraction pipeline failed") + raise HTTPException(status_code=500, detail=str(exc)) from exc diff --git a/nlp-service/test_main.py b/nlp-service/test_main.py new file mode 100644 index 00000000..d9382e2d --- /dev/null +++ b/nlp-service/test_main.py @@ -0,0 +1,52 @@ +import pytest +from fastapi.testclient import TestClient + + +@pytest.fixture(scope="session") +def client(): + from main import app + with TestClient(app) as c: + yield c + + +def test_health(client): + response = client.get("/health") + assert response.status_code == 200 + assert response.json() == {"status": "ok"} + + +def test_parse_returns_200_with_all_fields(client): + response = client.post("/parse", json={"query": "Briefe vor 1920", "lang": "de"}) + assert response.status_code == 200 + data = response.json() + assert "personNames" in data + assert "personRole" in data + assert data["personRole"] in ("sender", "receiver", "any") + assert "dateFrom" in data + assert "dateTo" in data + assert "keywords" in data + assert "rawQuery" in data + assert data["rawQuery"] == "Briefe vor 1920" + assert data["dateTo"] == "1920-12-31" + + +def test_parse_unknown_lang_returns_422(client): + response = client.post("/parse", json={"query": "test", "lang": "fr"}) + assert response.status_code == 422 + + +def test_parse_missing_query_returns_422(client): + response = client.post("/parse", json={"lang": "de"}) + assert response.status_code == 422 + + +def test_parse_all_languages(client): + cases = [ + ("de", "Briefe vor 1920"), + ("en", "letters before 1920"), + ("es", "cartas antes de 1920"), + ] + for lang, query in cases: + response = client.post("/parse", json={"query": query, "lang": lang}) + assert response.status_code == 200, f"Failed for lang={lang}" + assert response.json()["dateTo"] == "1920-12-31", f"Wrong dateTo for lang={lang}"