feat(normalizer): add SPOUSE_OF resolution to persons_tree

This commit is contained in:
Marcel
2026-05-25 21:03:46 +02:00
parent 1f2351e3c0
commit fa4b6b5fc2
2 changed files with 80 additions and 0 deletions

View File

@@ -207,3 +207,38 @@ def _deduplicate(persons: list[dict]) -> tuple[list[dict], list[str]]:
result.append(p) result.append(p)
return result, skipped return result, skipped
def _resolve_spouses(
persons: list[dict], index: dict[str, list[str]]
) -> tuple[list[dict], list[dict]]:
"""Emit SPOUSE_OF edges from each person's _spouse_raw field."""
relationships: list[dict] = []
unresolved: list[dict] = []
emitted: set[frozenset] = set()
for p in persons:
raw = (p.get("_spouse_raw") or "").strip()
if not raw:
continue
row_id = p["rowId"]
matched_id, reason = _resolve_one(raw, index)
if matched_id:
edge = frozenset([row_id, matched_id])
if edge not in emitted:
emitted.add(edge)
relationships.append({
"personId": row_id,
"relatedPersonId": matched_id,
"type": "SPOUSE_OF",
"source": "verheiratet_mit",
})
else:
unresolved.append({
"rowId": row_id,
"field": "verheiratet_mit",
"raw": raw,
"reason": reason,
})
return relationships, unresolved

View File

@@ -273,3 +273,48 @@ def test_deduplicate_both_none_birth_year_kept():
result, skipped = persons_tree._deduplicate(persons) result, skipped = persons_tree._deduplicate(persons)
assert [p["rowId"] for p in result] == ["row_A"] assert [p["rowId"] for p in result] == ["row_A"]
assert len(skipped) == 1 assert len(skipped) == 1
def _make_persons(*args):
"""Helper: args are (rowId, firstName, lastName, maidenName, spouse_raw) tuples."""
return [
{"rowId": a[0], "firstName": a[1], "lastName": a[2], "maidenName": a[3],
"_spouse_raw": a[4], "_bemerkung_raw": None,
"birthYear": None, "deathYear": None, "birthPlace": None, "deathPlace": None,
"generation": None, "familyMember": True, "alias": None, "notes": None}
for a in args
]
def test_resolve_spouses_success():
persons = _make_persons(
("row_002", "Elsgard", "Allemeyer", "Wöhler", "Allemeyer Werner"),
("row_003", "Werner", "Allemeyer", None, "Elsgard Wöhler"),
)
idx = persons_tree._build_index(persons)
rels, unres = persons_tree._resolve_spouses(persons, idx)
assert len(rels) == 1
assert rels[0]["type"] == "SPOUSE_OF"
assert set([rels[0]["personId"], rels[0]["relatedPersonId"]]) == {"row_002", "row_003"}
assert unres == []
def test_resolve_spouses_not_found():
persons = _make_persons(
("row_007", "Charlotte", "Blomquist", "Ruge", '"Tante Lolly"'),
)
idx = persons_tree._build_index(persons)
rels, unres = persons_tree._resolve_spouses(persons, idx)
assert rels == []
assert len(unres) == 1
assert unres[0]["rowId"] == "row_007"
assert unres[0]["reason"] == "not_found"
def test_resolve_spouses_empty_spouse_field():
persons = _make_persons(
("row_004", "Jürgen", "Allemeyer", None, None),
)
idx = persons_tree._build_index(persons)
rels, unres = persons_tree._resolve_spouses(persons, idx)
assert rels == [] and unres == []