feat(normalizer): add PARENT_OF Bemerkung extraction to persons_tree
This commit is contained in:
@@ -318,3 +318,103 @@ def test_resolve_spouses_empty_spouse_field():
|
||||
idx = persons_tree._build_index(persons)
|
||||
rels, unres = persons_tree._resolve_spouses(persons, idx)
|
||||
assert rels == [] and unres == []
|
||||
|
||||
|
||||
def _register(*args):
|
||||
"""Build index from (rowId, first, last, maiden) tuples."""
|
||||
persons = [
|
||||
{"rowId": a[0], "firstName": a[1], "lastName": a[2], "maidenName": a[3]}
|
||||
for a in args
|
||||
]
|
||||
return persons, persons_tree._build_index(persons)
|
||||
|
||||
|
||||
def test_parse_bemerkung_sohn_two_parents():
|
||||
_, idx = _register(
|
||||
("row_019", "Clara", "Cram", "de Gruyter"),
|
||||
("row_028", "Herbert", "Cram", None),
|
||||
)
|
||||
rels, unres, notes = persons_tree._parse_bemerkung(
|
||||
"row_021", "Sohn v Clara u Herbert", idx
|
||||
)
|
||||
assert len(rels) == 2
|
||||
assert all(r["type"] == "PARENT_OF" for r in rels)
|
||||
child_ids = {r["relatedPersonId"] for r in rels}
|
||||
parent_ids = {r["personId"] for r in rels}
|
||||
assert child_ids == {"row_021"}
|
||||
assert "row_019" in parent_ids and "row_028" in parent_ids
|
||||
assert unres == []
|
||||
assert notes == ""
|
||||
|
||||
|
||||
def test_parse_bemerkung_tochter_von():
|
||||
_, idx = _register(("row_019", "Clara", "Cram", None))
|
||||
rels, unres, notes = persons_tree._parse_bemerkung(
|
||||
"row_036", "Tochter von Clara Cram", idx
|
||||
)
|
||||
assert len(rels) == 1
|
||||
assert rels[0] == {
|
||||
"personId": "row_019",
|
||||
"relatedPersonId": "row_036",
|
||||
"type": "PARENT_OF",
|
||||
"source": "bemerkung",
|
||||
"rawBemerkung": "Tochter von Clara Cram",
|
||||
}
|
||||
assert notes == ""
|
||||
|
||||
|
||||
def test_parse_bemerkung_vater():
|
||||
_, idx = _register(("row_028", "Herbert", "Cram", None))
|
||||
rels, unres, notes = persons_tree._parse_bemerkung(
|
||||
"row_031", "Vater v Herbert", idx
|
||||
)
|
||||
assert len(rels) == 1
|
||||
assert rels[0]["personId"] == "row_031"
|
||||
assert rels[0]["relatedPersonId"] == "row_028"
|
||||
assert rels[0]["type"] == "PARENT_OF"
|
||||
|
||||
|
||||
def test_parse_bemerkung_unmatched_parent_name():
|
||||
_, idx = _register() # empty index
|
||||
rels, unres, notes = persons_tree._parse_bemerkung(
|
||||
"row_004", "Sohn v Elsgard A.", idx
|
||||
)
|
||||
assert rels == []
|
||||
assert len(unres) == 1
|
||||
assert unres[0]["reason"] == "not_found"
|
||||
assert notes == ""
|
||||
|
||||
|
||||
def test_parse_bemerkung_skip_nichte():
|
||||
_, idx = _register(("row_028", "Herbert", "Cram", None))
|
||||
rels, unres, notes = persons_tree._parse_bemerkung(
|
||||
"row_002", "Nichte von Herbert", idx
|
||||
)
|
||||
assert rels == []
|
||||
assert unres == []
|
||||
assert notes == "Nichte von Herbert"
|
||||
|
||||
|
||||
def test_parse_bemerkung_skip_bruder():
|
||||
_, idx = _register(("row_028", "Herbert", "Cram", None))
|
||||
rels, unres, notes = persons_tree._parse_bemerkung(
|
||||
"row_033", "Bruder v Herbert", idx
|
||||
)
|
||||
assert rels == []
|
||||
assert unres == []
|
||||
assert notes == "Bruder v Herbert"
|
||||
|
||||
|
||||
def test_parse_bemerkung_empty():
|
||||
_, idx = _register()
|
||||
rels, unres, notes = persons_tree._parse_bemerkung("row_004", "", idx)
|
||||
assert rels == [] and unres == [] and notes == ""
|
||||
|
||||
|
||||
def test_parse_bemerkung_plain_remark():
|
||||
_, idx = _register()
|
||||
rels, unres, notes = persons_tree._parse_bemerkung(
|
||||
"row_029", "Verfasserin der Cram-Chronik !!", idx
|
||||
)
|
||||
assert rels == [] and unres == []
|
||||
assert notes == "Verfasserin der Cram-Chronik !!"
|
||||
|
||||
Reference in New Issue
Block a user