feat(normalizer): add row parser to persons_tree
This commit is contained in:
@@ -128,3 +128,48 @@ def _resolve_one(raw: str, index: dict[str, list[str]]) -> tuple[str | None, str
|
|||||||
if len(hits) == 0:
|
if len(hits) == 0:
|
||||||
return None, "not_found"
|
return None, "not_found"
|
||||||
return None, "ambiguous"
|
return None, "ambiguous"
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_row(row_num: int, fields: dict) -> dict:
|
||||||
|
"""Produce one person record from a header-mapped row dict.
|
||||||
|
|
||||||
|
Internal keys prefixed with '_' are stripped before JSON output in main().
|
||||||
|
"""
|
||||||
|
def s(key: str) -> str:
|
||||||
|
return (fields.get(key) or "").strip()
|
||||||
|
|
||||||
|
birth_raw = s("birth_date")
|
||||||
|
death_raw = s("death_date")
|
||||||
|
|
||||||
|
birth_year = _parse_year(birth_raw)
|
||||||
|
death_year = _parse_year(death_raw)
|
||||||
|
|
||||||
|
notes_parts = []
|
||||||
|
if birth_raw and birth_year is None:
|
||||||
|
notes_parts.append(f"[Geburtsdatum: {birth_raw}]")
|
||||||
|
if death_raw and death_year is None:
|
||||||
|
notes_parts.append(f"[Todesdatum: {death_raw}]")
|
||||||
|
bemerkung = s("notes")
|
||||||
|
if bemerkung:
|
||||||
|
notes_parts.append(bemerkung)
|
||||||
|
|
||||||
|
maiden = s("maiden_name") or None
|
||||||
|
spouse = s("spouse") or None
|
||||||
|
bemerkung_out = bemerkung or None
|
||||||
|
|
||||||
|
return {
|
||||||
|
"rowId": f"row_{row_num:03d}",
|
||||||
|
"firstName": s("first_name"),
|
||||||
|
"lastName": s("last_name"),
|
||||||
|
"maidenName": maiden,
|
||||||
|
"alias": None,
|
||||||
|
"notes": " ".join(notes_parts) or None,
|
||||||
|
"birthYear": birth_year,
|
||||||
|
"deathYear": death_year,
|
||||||
|
"birthPlace": s("birth_place") or None,
|
||||||
|
"deathPlace": s("death_place") or None,
|
||||||
|
"generation": _parse_generation(s("generation")),
|
||||||
|
"familyMember": True,
|
||||||
|
"_spouse_raw": spouse,
|
||||||
|
"_bemerkung_raw": bemerkung_out,
|
||||||
|
}
|
||||||
|
|||||||
@@ -171,3 +171,61 @@ def test_resolve_one_ambiguous():
|
|||||||
row_id, reason = persons_tree._resolve_one("Cram", idx)
|
row_id, reason = persons_tree._resolve_one("Cram", idx)
|
||||||
assert row_id is None
|
assert row_id is None
|
||||||
assert reason == "ambiguous"
|
assert reason == "ambiguous"
|
||||||
|
|
||||||
|
|
||||||
|
def test_parse_row_serial_dates():
|
||||||
|
fields = {
|
||||||
|
"generation": "G 3", "last_name": "Allemeyer", "first_name": "Elsgard",
|
||||||
|
"maiden_name": "Wöhler", "birth_date": "7568", "birth_place": "Garz",
|
||||||
|
"death_date": "36222", "death_place": "Espelkamp",
|
||||||
|
"spouse": "Allemeyer Werner", "notes": "Nichte von Herbert",
|
||||||
|
}
|
||||||
|
p = persons_tree._parse_row(2, fields)
|
||||||
|
assert p["rowId"] == "row_002"
|
||||||
|
assert p["firstName"] == "Elsgard"
|
||||||
|
assert p["lastName"] == "Allemeyer"
|
||||||
|
assert p["maidenName"] == "Wöhler"
|
||||||
|
assert p["birthYear"] == 1920
|
||||||
|
assert p["deathYear"] == 1999
|
||||||
|
assert p["birthPlace"] == "Garz"
|
||||||
|
assert p["deathPlace"] == "Espelkamp"
|
||||||
|
assert p["generation"] == 3
|
||||||
|
assert p["familyMember"] is True
|
||||||
|
assert p["_spouse_raw"] == "Allemeyer Werner"
|
||||||
|
assert p["_bemerkung_raw"] == "Nichte von Herbert"
|
||||||
|
assert "[Geburtsdatum" not in (p["notes"] or "")
|
||||||
|
|
||||||
|
|
||||||
|
def test_parse_row_string_birth_date():
|
||||||
|
fields = {
|
||||||
|
"generation": "G 2", "last_name": "Cram", "first_name": "Herbert",
|
||||||
|
"maiden_name": "", "birth_date": "25.6.1890", "birth_place": "Texas",
|
||||||
|
"death_date": "", "death_place": "", "spouse": "", "notes": "",
|
||||||
|
}
|
||||||
|
p = persons_tree._parse_row(28, fields)
|
||||||
|
assert p["birthYear"] == 1890
|
||||||
|
assert p["deathYear"] is None
|
||||||
|
assert p["notes"] is None or p["notes"] == ""
|
||||||
|
|
||||||
|
|
||||||
|
def test_parse_row_unresolvable_date_goes_to_notes():
|
||||||
|
fields = {
|
||||||
|
"generation": "G 3", "last_name": "Heydrich", "first_name": "Dieter",
|
||||||
|
"maiden_name": "", "birth_date": "28.9.", "birth_place": "",
|
||||||
|
"death_date": "", "death_place": "", "spouse": "", "notes": "Bruder v Ingrid",
|
||||||
|
}
|
||||||
|
p = persons_tree._parse_row(96, fields)
|
||||||
|
assert p["birthYear"] is None
|
||||||
|
assert "[Geburtsdatum: 28.9.]" in p["notes"]
|
||||||
|
assert "Bruder v Ingrid" in p["notes"]
|
||||||
|
|
||||||
|
|
||||||
|
def test_parse_row_empty_spouse_and_notes():
|
||||||
|
fields = {
|
||||||
|
"generation": "G 4", "last_name": "Allemeyer", "first_name": "Jürgen",
|
||||||
|
"maiden_name": "", "birth_date": "", "birth_place": "",
|
||||||
|
"death_date": "", "death_place": "", "spouse": "", "notes": "",
|
||||||
|
}
|
||||||
|
p = persons_tree._parse_row(4, fields)
|
||||||
|
assert p["_spouse_raw"] is None
|
||||||
|
assert p["_bemerkung_raw"] is None
|
||||||
|
|||||||
Reference in New Issue
Block a user