Files
familienarchiv/nlp-service/test_main.py
2026-06-08 10:56:32 +02:00

118 lines
3.5 KiB
Python

"""Integration tests for the FastAPI app."""
import pytest
from fastapi.testclient import TestClient
from extractor import set_person_matcher
from person_matcher import PersonMatcher
_TEST_PERSONS = [
("Clara", "Cram"),
("Herbert", "Cram"),
("Eugenie", "de Gruyter"),
("Walter", "de Gruyter"),
("Marie", "Cram"),
("Anita", "Wöhler"),
]
@pytest.fixture(scope="session")
def client():
# Pre-seed the matcher so the lifespan doesn't overwrite it with an empty one.
m = PersonMatcher()
m.load(_TEST_PERSONS)
set_person_matcher(m)
from main import app
with TestClient(app) as c:
yield c
def test_health(client):
r = client.get("/health")
assert r.status_code == 200
assert r.json()["status"] == "ok"
assert r.json()["persons_loaded"] > 0
def test_parse_returns_200_with_all_fields(client):
r = client.post("/parse", json={"query": "Briefe vor 1920", "lang": "de"})
assert r.status_code == 200
d = r.json()
assert "personNames" in d
assert d["personRole"] in ("sender", "receiver", "any")
assert "dateFrom" in d
assert "dateTo" in d
assert "keywords" in d
assert d["rawQuery"] == "Briefe vor 1920"
assert d["dateTo"] == "1920-12-31"
def test_parse_person_with_date(client):
r = client.post(
"/parse",
json={"query": "Briefe von Clara Cram an Walter de Gruyter im Jahr 1920", "lang": "de"},
)
assert r.status_code == 200
d = r.json()
assert "Clara Cram" in d["personNames"]
assert "Walter de Gruyter" in d["personNames"]
assert d["dateFrom"] == "1920-01-01"
assert d["dateTo"] == "1920-12-31"
def test_parse_unknown_lang_returns_422(client):
r = client.post("/parse", json={"query": "test", "lang": "fr"})
assert r.status_code == 422
def test_parse_missing_query_returns_422(client):
r = client.post("/parse", json={"lang": "de"})
assert r.status_code == 422
def test_parse_all_languages(client):
cases = [
("de", "Briefe vor 1920"),
("en", "letters before 1920"),
("es", "cartas antes de 1920"),
]
for lang, query in cases:
r = client.post("/parse", json={"query": query, "lang": lang})
assert r.status_code == 200, f"Failed for lang={lang}"
assert r.json()["dateTo"] == "1920-12-31", f"Wrong dateTo for lang={lang}"
def test_fuzzy_threshold_valid_range():
from main import _parse_fuzzy_threshold
assert _parse_fuzzy_threshold("80") == 80
assert _parse_fuzzy_threshold("0") == 0
assert _parse_fuzzy_threshold("100") == 100
def test_fuzzy_threshold_out_of_range_raises():
from main import _parse_fuzzy_threshold
with pytest.raises(ValueError):
_parse_fuzzy_threshold("101")
with pytest.raises(ValueError):
_parse_fuzzy_threshold("-1")
with pytest.raises(ValueError):
_parse_fuzzy_threshold("abc")
def test_parse_exceeds_max_length_returns_422(client):
r = client.post("/parse", json={"query": "a" * 501, "lang": "de"})
assert r.status_code == 422
def test_parse_internal_exception_does_not_leak_detail(client, monkeypatch):
"""500 errors must return generic message — never expose internal details."""
import main as main_module
def _boom(query, lang):
raise RuntimeError("postgresql://archive_user:s3cr3t@db:5432/family_archive_db")
monkeypatch.setattr(main_module, "extract", _boom)
r = client.post("/parse", json={"query": "test", "lang": "de"})
assert r.status_code == 500
assert "s3cr3t" not in r.text
assert r.json()["detail"] == "internal error"