feat(normalizer): add PARENT_OF Bemerkung extraction to persons_tree

This commit is contained in:
Marcel
2026-05-25 21:06:24 +02:00
parent fa4b6b5fc2
commit 6f55489ec2
2 changed files with 164 additions and 0 deletions

View File

@@ -318,3 +318,103 @@ def test_resolve_spouses_empty_spouse_field():
idx = persons_tree._build_index(persons)
rels, unres = persons_tree._resolve_spouses(persons, idx)
assert rels == [] and unres == []
def _register(*args):
"""Build index from (rowId, first, last, maiden) tuples."""
persons = [
{"rowId": a[0], "firstName": a[1], "lastName": a[2], "maidenName": a[3]}
for a in args
]
return persons, persons_tree._build_index(persons)
def test_parse_bemerkung_sohn_two_parents():
_, idx = _register(
("row_019", "Clara", "Cram", "de Gruyter"),
("row_028", "Herbert", "Cram", None),
)
rels, unres, notes = persons_tree._parse_bemerkung(
"row_021", "Sohn v Clara u Herbert", idx
)
assert len(rels) == 2
assert all(r["type"] == "PARENT_OF" for r in rels)
child_ids = {r["relatedPersonId"] for r in rels}
parent_ids = {r["personId"] for r in rels}
assert child_ids == {"row_021"}
assert "row_019" in parent_ids and "row_028" in parent_ids
assert unres == []
assert notes == ""
def test_parse_bemerkung_tochter_von():
_, idx = _register(("row_019", "Clara", "Cram", None))
rels, unres, notes = persons_tree._parse_bemerkung(
"row_036", "Tochter von Clara Cram", idx
)
assert len(rels) == 1
assert rels[0] == {
"personId": "row_019",
"relatedPersonId": "row_036",
"type": "PARENT_OF",
"source": "bemerkung",
"rawBemerkung": "Tochter von Clara Cram",
}
assert notes == ""
def test_parse_bemerkung_vater():
_, idx = _register(("row_028", "Herbert", "Cram", None))
rels, unres, notes = persons_tree._parse_bemerkung(
"row_031", "Vater v Herbert", idx
)
assert len(rels) == 1
assert rels[0]["personId"] == "row_031"
assert rels[0]["relatedPersonId"] == "row_028"
assert rels[0]["type"] == "PARENT_OF"
def test_parse_bemerkung_unmatched_parent_name():
_, idx = _register() # empty index
rels, unres, notes = persons_tree._parse_bemerkung(
"row_004", "Sohn v Elsgard A.", idx
)
assert rels == []
assert len(unres) == 1
assert unres[0]["reason"] == "not_found"
assert notes == ""
def test_parse_bemerkung_skip_nichte():
_, idx = _register(("row_028", "Herbert", "Cram", None))
rels, unres, notes = persons_tree._parse_bemerkung(
"row_002", "Nichte von Herbert", idx
)
assert rels == []
assert unres == []
assert notes == "Nichte von Herbert"
def test_parse_bemerkung_skip_bruder():
_, idx = _register(("row_028", "Herbert", "Cram", None))
rels, unres, notes = persons_tree._parse_bemerkung(
"row_033", "Bruder v Herbert", idx
)
assert rels == []
assert unres == []
assert notes == "Bruder v Herbert"
def test_parse_bemerkung_empty():
_, idx = _register()
rels, unres, notes = persons_tree._parse_bemerkung("row_004", "", idx)
assert rels == [] and unres == [] and notes == ""
def test_parse_bemerkung_plain_remark():
_, idx = _register()
rels, unres, notes = persons_tree._parse_bemerkung(
"row_029", "Verfasserin der Cram-Chronik !!", idx
)
assert rels == [] and unres == []
assert notes == "Verfasserin der Cram-Chronik !!"