feat(normalizer): add SPOUSE_OF resolution to persons_tree
This commit is contained in:
@@ -207,3 +207,38 @@ def _deduplicate(persons: list[dict]) -> tuple[list[dict], list[str]]:
|
|||||||
result.append(p)
|
result.append(p)
|
||||||
|
|
||||||
return result, skipped
|
return result, skipped
|
||||||
|
|
||||||
|
|
||||||
|
def _resolve_spouses(
|
||||||
|
persons: list[dict], index: dict[str, list[str]]
|
||||||
|
) -> tuple[list[dict], list[dict]]:
|
||||||
|
"""Emit SPOUSE_OF edges from each person's _spouse_raw field."""
|
||||||
|
relationships: list[dict] = []
|
||||||
|
unresolved: list[dict] = []
|
||||||
|
emitted: set[frozenset] = set()
|
||||||
|
|
||||||
|
for p in persons:
|
||||||
|
raw = (p.get("_spouse_raw") or "").strip()
|
||||||
|
if not raw:
|
||||||
|
continue
|
||||||
|
row_id = p["rowId"]
|
||||||
|
matched_id, reason = _resolve_one(raw, index)
|
||||||
|
if matched_id:
|
||||||
|
edge = frozenset([row_id, matched_id])
|
||||||
|
if edge not in emitted:
|
||||||
|
emitted.add(edge)
|
||||||
|
relationships.append({
|
||||||
|
"personId": row_id,
|
||||||
|
"relatedPersonId": matched_id,
|
||||||
|
"type": "SPOUSE_OF",
|
||||||
|
"source": "verheiratet_mit",
|
||||||
|
})
|
||||||
|
else:
|
||||||
|
unresolved.append({
|
||||||
|
"rowId": row_id,
|
||||||
|
"field": "verheiratet_mit",
|
||||||
|
"raw": raw,
|
||||||
|
"reason": reason,
|
||||||
|
})
|
||||||
|
|
||||||
|
return relationships, unresolved
|
||||||
|
|||||||
@@ -273,3 +273,48 @@ def test_deduplicate_both_none_birth_year_kept():
|
|||||||
result, skipped = persons_tree._deduplicate(persons)
|
result, skipped = persons_tree._deduplicate(persons)
|
||||||
assert [p["rowId"] for p in result] == ["row_A"]
|
assert [p["rowId"] for p in result] == ["row_A"]
|
||||||
assert len(skipped) == 1
|
assert len(skipped) == 1
|
||||||
|
|
||||||
|
|
||||||
|
def _make_persons(*args):
|
||||||
|
"""Helper: args are (rowId, firstName, lastName, maidenName, spouse_raw) tuples."""
|
||||||
|
return [
|
||||||
|
{"rowId": a[0], "firstName": a[1], "lastName": a[2], "maidenName": a[3],
|
||||||
|
"_spouse_raw": a[4], "_bemerkung_raw": None,
|
||||||
|
"birthYear": None, "deathYear": None, "birthPlace": None, "deathPlace": None,
|
||||||
|
"generation": None, "familyMember": True, "alias": None, "notes": None}
|
||||||
|
for a in args
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def test_resolve_spouses_success():
|
||||||
|
persons = _make_persons(
|
||||||
|
("row_002", "Elsgard", "Allemeyer", "Wöhler", "Allemeyer Werner"),
|
||||||
|
("row_003", "Werner", "Allemeyer", None, "Elsgard Wöhler"),
|
||||||
|
)
|
||||||
|
idx = persons_tree._build_index(persons)
|
||||||
|
rels, unres = persons_tree._resolve_spouses(persons, idx)
|
||||||
|
assert len(rels) == 1
|
||||||
|
assert rels[0]["type"] == "SPOUSE_OF"
|
||||||
|
assert set([rels[0]["personId"], rels[0]["relatedPersonId"]]) == {"row_002", "row_003"}
|
||||||
|
assert unres == []
|
||||||
|
|
||||||
|
|
||||||
|
def test_resolve_spouses_not_found():
|
||||||
|
persons = _make_persons(
|
||||||
|
("row_007", "Charlotte", "Blomquist", "Ruge", '"Tante Lolly"'),
|
||||||
|
)
|
||||||
|
idx = persons_tree._build_index(persons)
|
||||||
|
rels, unres = persons_tree._resolve_spouses(persons, idx)
|
||||||
|
assert rels == []
|
||||||
|
assert len(unres) == 1
|
||||||
|
assert unres[0]["rowId"] == "row_007"
|
||||||
|
assert unres[0]["reason"] == "not_found"
|
||||||
|
|
||||||
|
|
||||||
|
def test_resolve_spouses_empty_spouse_field():
|
||||||
|
persons = _make_persons(
|
||||||
|
("row_004", "Jürgen", "Allemeyer", None, None),
|
||||||
|
)
|
||||||
|
idx = persons_tree._build_index(persons)
|
||||||
|
rels, unres = persons_tree._resolve_spouses(persons, idx)
|
||||||
|
assert rels == [] and unres == []
|
||||||
|
|||||||
Reference in New Issue
Block a user