feat(normalizer): add SPOUSE_OF resolution to persons_tree
This commit is contained in:
@@ -207,3 +207,38 @@ def _deduplicate(persons: list[dict]) -> tuple[list[dict], list[str]]:
|
||||
result.append(p)
|
||||
|
||||
return result, skipped
|
||||
|
||||
|
||||
def _resolve_spouses(
|
||||
persons: list[dict], index: dict[str, list[str]]
|
||||
) -> tuple[list[dict], list[dict]]:
|
||||
"""Emit SPOUSE_OF edges from each person's _spouse_raw field."""
|
||||
relationships: list[dict] = []
|
||||
unresolved: list[dict] = []
|
||||
emitted: set[frozenset] = set()
|
||||
|
||||
for p in persons:
|
||||
raw = (p.get("_spouse_raw") or "").strip()
|
||||
if not raw:
|
||||
continue
|
||||
row_id = p["rowId"]
|
||||
matched_id, reason = _resolve_one(raw, index)
|
||||
if matched_id:
|
||||
edge = frozenset([row_id, matched_id])
|
||||
if edge not in emitted:
|
||||
emitted.add(edge)
|
||||
relationships.append({
|
||||
"personId": row_id,
|
||||
"relatedPersonId": matched_id,
|
||||
"type": "SPOUSE_OF",
|
||||
"source": "verheiratet_mit",
|
||||
})
|
||||
else:
|
||||
unresolved.append({
|
||||
"rowId": row_id,
|
||||
"field": "verheiratet_mit",
|
||||
"raw": raw,
|
||||
"reason": reason,
|
||||
})
|
||||
|
||||
return relationships, unresolved
|
||||
|
||||
@@ -273,3 +273,48 @@ def test_deduplicate_both_none_birth_year_kept():
|
||||
result, skipped = persons_tree._deduplicate(persons)
|
||||
assert [p["rowId"] for p in result] == ["row_A"]
|
||||
assert len(skipped) == 1
|
||||
|
||||
|
||||
def _make_persons(*args):
|
||||
"""Helper: args are (rowId, firstName, lastName, maidenName, spouse_raw) tuples."""
|
||||
return [
|
||||
{"rowId": a[0], "firstName": a[1], "lastName": a[2], "maidenName": a[3],
|
||||
"_spouse_raw": a[4], "_bemerkung_raw": None,
|
||||
"birthYear": None, "deathYear": None, "birthPlace": None, "deathPlace": None,
|
||||
"generation": None, "familyMember": True, "alias": None, "notes": None}
|
||||
for a in args
|
||||
]
|
||||
|
||||
|
||||
def test_resolve_spouses_success():
|
||||
persons = _make_persons(
|
||||
("row_002", "Elsgard", "Allemeyer", "Wöhler", "Allemeyer Werner"),
|
||||
("row_003", "Werner", "Allemeyer", None, "Elsgard Wöhler"),
|
||||
)
|
||||
idx = persons_tree._build_index(persons)
|
||||
rels, unres = persons_tree._resolve_spouses(persons, idx)
|
||||
assert len(rels) == 1
|
||||
assert rels[0]["type"] == "SPOUSE_OF"
|
||||
assert set([rels[0]["personId"], rels[0]["relatedPersonId"]]) == {"row_002", "row_003"}
|
||||
assert unres == []
|
||||
|
||||
|
||||
def test_resolve_spouses_not_found():
|
||||
persons = _make_persons(
|
||||
("row_007", "Charlotte", "Blomquist", "Ruge", '"Tante Lolly"'),
|
||||
)
|
||||
idx = persons_tree._build_index(persons)
|
||||
rels, unres = persons_tree._resolve_spouses(persons, idx)
|
||||
assert rels == []
|
||||
assert len(unres) == 1
|
||||
assert unres[0]["rowId"] == "row_007"
|
||||
assert unres[0]["reason"] == "not_found"
|
||||
|
||||
|
||||
def test_resolve_spouses_empty_spouse_field():
|
||||
persons = _make_persons(
|
||||
("row_004", "Jürgen", "Allemeyer", None, None),
|
||||
)
|
||||
idx = persons_tree._build_index(persons)
|
||||
rels, unres = persons_tree._resolve_spouses(persons, idx)
|
||||
assert rels == [] and unres == []
|
||||
|
||||
Reference in New Issue
Block a user