"""Integration tests for the FastAPI app.""" import pytest from fastapi.testclient import TestClient from extractor import set_person_matcher from person_matcher import PersonMatcher _TEST_PERSONS = [ ("Clara", "Cram"), ("Herbert", "Cram"), ("Eugenie", "de Gruyter"), ("Walter", "de Gruyter"), ("Marie", "Cram"), ("Anita", "Wöhler"), ] @pytest.fixture(scope="session") def client(): # Pre-seed the matcher so the lifespan doesn't overwrite it with an empty one. m = PersonMatcher() m.load(_TEST_PERSONS) set_person_matcher(m) from main import app with TestClient(app) as c: yield c def test_health(client): r = client.get("/health") assert r.status_code == 200 assert r.json()["status"] == "ok" assert r.json()["persons_loaded"] > 0 def test_parse_returns_200_with_all_fields(client): r = client.post("/parse", json={"query": "Briefe vor 1920", "lang": "de"}) assert r.status_code == 200 d = r.json() assert "personNames" in d assert d["personRole"] in ("sender", "receiver", "any") assert "dateFrom" in d assert "dateTo" in d assert "keywords" in d assert d["rawQuery"] == "Briefe vor 1920" assert d["dateTo"] == "1920-12-31" def test_parse_person_with_date(client): r = client.post( "/parse", json={"query": "Briefe von Clara Cram an Walter de Gruyter im Jahr 1920", "lang": "de"}, ) assert r.status_code == 200 d = r.json() assert "Clara Cram" in d["personNames"] assert "Walter de Gruyter" in d["personNames"] assert d["dateFrom"] == "1920-01-01" assert d["dateTo"] == "1920-12-31" def test_parse_unknown_lang_returns_422(client): r = client.post("/parse", json={"query": "test", "lang": "fr"}) assert r.status_code == 422 def test_parse_missing_query_returns_422(client): r = client.post("/parse", json={"lang": "de"}) assert r.status_code == 422 def test_parse_all_languages(client): cases = [ ("de", "Briefe vor 1920"), ("en", "letters before 1920"), ("es", "cartas antes de 1920"), ] for lang, query in cases: r = client.post("/parse", json={"query": query, "lang": lang}) assert r.status_code == 200, f"Failed for lang={lang}" assert r.json()["dateTo"] == "1920-12-31", f"Wrong dateTo for lang={lang}" def test_fuzzy_threshold_valid_range(): from main import _parse_fuzzy_threshold assert _parse_fuzzy_threshold("80") == 80 assert _parse_fuzzy_threshold("0") == 0 assert _parse_fuzzy_threshold("100") == 100 def test_fuzzy_threshold_out_of_range_raises(): from main import _parse_fuzzy_threshold with pytest.raises(ValueError): _parse_fuzzy_threshold("101") with pytest.raises(ValueError): _parse_fuzzy_threshold("-1") with pytest.raises(ValueError): _parse_fuzzy_threshold("abc") def test_parse_exceeds_max_length_returns_422(client): r = client.post("/parse", json={"query": "a" * 501, "lang": "de"}) assert r.status_code == 422 def test_parse_internal_exception_does_not_leak_detail(client, monkeypatch): """500 errors must return generic message — never expose internal details.""" import main as main_module def _boom(query, lang): raise RuntimeError("postgresql://archive_user:s3cr3t@db:5432/family_archive_db") monkeypatch.setattr(main_module, "extract", _boom) r = client.post("/parse", json={"query": "test", "lang": "de"}) assert r.status_code == 500 assert "s3cr3t" not in r.text assert r.json()["detail"] == "internal error"