118 lines
3.5 KiB
Python
118 lines
3.5 KiB
Python
"""Integration tests for the FastAPI app."""
|
|
import pytest
|
|
from fastapi.testclient import TestClient
|
|
|
|
from extractor import set_person_matcher
|
|
from person_matcher import PersonMatcher
|
|
|
|
_TEST_PERSONS = [
|
|
("Clara", "Cram"),
|
|
("Herbert", "Cram"),
|
|
("Eugenie", "de Gruyter"),
|
|
("Walter", "de Gruyter"),
|
|
("Marie", "Cram"),
|
|
("Anita", "Wöhler"),
|
|
]
|
|
|
|
|
|
@pytest.fixture(scope="session")
|
|
def client():
|
|
# Pre-seed the matcher so the lifespan doesn't overwrite it with an empty one.
|
|
m = PersonMatcher()
|
|
m.load(_TEST_PERSONS)
|
|
set_person_matcher(m)
|
|
from main import app
|
|
with TestClient(app) as c:
|
|
yield c
|
|
|
|
|
|
def test_health(client):
|
|
r = client.get("/health")
|
|
assert r.status_code == 200
|
|
assert r.json()["status"] == "ok"
|
|
assert r.json()["persons_loaded"] > 0
|
|
|
|
|
|
def test_parse_returns_200_with_all_fields(client):
|
|
r = client.post("/parse", json={"query": "Briefe vor 1920", "lang": "de"})
|
|
assert r.status_code == 200
|
|
d = r.json()
|
|
assert "personNames" in d
|
|
assert d["personRole"] in ("sender", "receiver", "any")
|
|
assert "dateFrom" in d
|
|
assert "dateTo" in d
|
|
assert "keywords" in d
|
|
assert d["rawQuery"] == "Briefe vor 1920"
|
|
assert d["dateTo"] == "1920-12-31"
|
|
|
|
|
|
def test_parse_person_with_date(client):
|
|
r = client.post(
|
|
"/parse",
|
|
json={"query": "Briefe von Clara Cram an Walter de Gruyter im Jahr 1920", "lang": "de"},
|
|
)
|
|
assert r.status_code == 200
|
|
d = r.json()
|
|
assert "Clara Cram" in d["personNames"]
|
|
assert "Walter de Gruyter" in d["personNames"]
|
|
assert d["dateFrom"] == "1920-01-01"
|
|
assert d["dateTo"] == "1920-12-31"
|
|
|
|
|
|
def test_parse_unknown_lang_returns_422(client):
|
|
r = client.post("/parse", json={"query": "test", "lang": "fr"})
|
|
assert r.status_code == 422
|
|
|
|
|
|
def test_parse_missing_query_returns_422(client):
|
|
r = client.post("/parse", json={"lang": "de"})
|
|
assert r.status_code == 422
|
|
|
|
|
|
def test_parse_all_languages(client):
|
|
cases = [
|
|
("de", "Briefe vor 1920"),
|
|
("en", "letters before 1920"),
|
|
("es", "cartas antes de 1920"),
|
|
]
|
|
for lang, query in cases:
|
|
r = client.post("/parse", json={"query": query, "lang": lang})
|
|
assert r.status_code == 200, f"Failed for lang={lang}"
|
|
assert r.json()["dateTo"] == "1920-12-31", f"Wrong dateTo for lang={lang}"
|
|
|
|
|
|
def test_fuzzy_threshold_valid_range():
|
|
from main import _parse_fuzzy_threshold
|
|
assert _parse_fuzzy_threshold("80") == 80
|
|
assert _parse_fuzzy_threshold("0") == 0
|
|
assert _parse_fuzzy_threshold("100") == 100
|
|
|
|
|
|
def test_fuzzy_threshold_out_of_range_raises():
|
|
from main import _parse_fuzzy_threshold
|
|
with pytest.raises(ValueError):
|
|
_parse_fuzzy_threshold("101")
|
|
with pytest.raises(ValueError):
|
|
_parse_fuzzy_threshold("-1")
|
|
with pytest.raises(ValueError):
|
|
_parse_fuzzy_threshold("abc")
|
|
|
|
|
|
def test_parse_exceeds_max_length_returns_422(client):
|
|
r = client.post("/parse", json={"query": "a" * 501, "lang": "de"})
|
|
assert r.status_code == 422
|
|
|
|
|
|
def test_parse_internal_exception_does_not_leak_detail(client, monkeypatch):
|
|
"""500 errors must return generic message — never expose internal details."""
|
|
import main as main_module
|
|
|
|
def _boom(query, lang):
|
|
raise RuntimeError("postgresql://archive_user:s3cr3t@db:5432/family_archive_db")
|
|
|
|
monkeypatch.setattr(main_module, "extract", _boom)
|
|
r = client.post("/parse", json={"query": "test", "lang": "de"})
|
|
assert r.status_code == 500
|
|
assert "s3cr3t" not in r.text
|
|
assert r.json()["detail"] == "internal error"
|