Import normalizer: offline tool to normalize the raw archive spreadsheets #663
@@ -193,6 +193,12 @@ def _attach_person_ids(tree_persons: list[dict], raw_dicts: list[dict]) -> None:
|
|||||||
parse_register and _parse_row both keep exactly the rows that have a last name.
|
parse_register and _parse_row both keep exactly the rows that have a last name.
|
||||||
"""
|
"""
|
||||||
register = _persons.parse_register(raw_dicts)
|
register = _persons.parse_register(raw_dicts)
|
||||||
|
if len(tree_persons) != len(register):
|
||||||
|
raise ValueError(
|
||||||
|
"person_id propagation requires equal length: "
|
||||||
|
f"{len(tree_persons)} tree persons vs {len(register)} register persons "
|
||||||
|
"(the positional zip would otherwise silently truncate and mis-join ids)"
|
||||||
|
)
|
||||||
for tree_person, register_person in zip(tree_persons, register):
|
for tree_person, register_person in zip(tree_persons, register):
|
||||||
tree_person["personId"] = register_person.person_id
|
tree_person["personId"] = register_person.person_id
|
||||||
|
|
||||||
|
|||||||
@@ -454,6 +454,26 @@ def test_attach_person_ids_propagates_register_slug():
|
|||||||
assert tree_persons[1]["personId"] == "de-gruyter-eugenie"
|
assert tree_persons[1]["personId"] == "de-gruyter-eugenie"
|
||||||
|
|
||||||
|
|
||||||
|
def test_attach_person_ids_raises_on_length_divergence():
|
||||||
|
# The propagation is a positional zip; if tree_persons and the register drift in
|
||||||
|
# length (e.g. a future filter change), zip would silently truncate and mis-join ids.
|
||||||
|
# The guard must fail loudly instead.
|
||||||
|
raw_dicts = [
|
||||||
|
{"generation": "G 1", "last_name": "de Gruyter", "first_name": "Walter",
|
||||||
|
"maiden_name": "", "birth_date": "", "birth_place": "",
|
||||||
|
"death_date": "", "death_place": "", "spouse": "", "notes": ""},
|
||||||
|
# second register row has a last name -> parse_register keeps it ...
|
||||||
|
{"generation": "G 1", "last_name": "de Gruyter", "first_name": "Eugenie",
|
||||||
|
"maiden_name": "Müller", "birth_date": "", "birth_place": "",
|
||||||
|
"death_date": "", "death_place": "", "spouse": "", "notes": ""},
|
||||||
|
]
|
||||||
|
# ... but the tree side only has one person -> lengths diverge.
|
||||||
|
tree_persons = [persons_tree._parse_row(2, raw_dicts[0])]
|
||||||
|
import pytest
|
||||||
|
with pytest.raises(ValueError, match="length"):
|
||||||
|
persons_tree._attach_person_ids(tree_persons, raw_dicts)
|
||||||
|
|
||||||
|
|
||||||
def test_attach_person_ids_carries_register_collision_suffix():
|
def test_attach_person_ids_carries_register_collision_suffix():
|
||||||
# when two register rows slug-collide, the register suffixes the ids (-1, -2);
|
# when two register rows slug-collide, the register suffixes the ids (-1, -2);
|
||||||
# those exact suffixed ids must reach the tree persons, never a recomputed bare slug
|
# those exact suffixed ids must reach the tree persons, never a recomputed bare slug
|
||||||
|
|||||||
Reference in New Issue
Block a user