diff --git a/tools/import-normalizer/persons_tree.py b/tools/import-normalizer/persons_tree.py index f6b1b3c8..66e1b660 100644 --- a/tools/import-normalizer/persons_tree.py +++ b/tools/import-normalizer/persons_tree.py @@ -128,3 +128,48 @@ def _resolve_one(raw: str, index: dict[str, list[str]]) -> tuple[str | None, str if len(hits) == 0: return None, "not_found" return None, "ambiguous" + + +def _parse_row(row_num: int, fields: dict) -> dict: + """Produce one person record from a header-mapped row dict. + + Internal keys prefixed with '_' are stripped before JSON output in main(). + """ + def s(key: str) -> str: + return (fields.get(key) or "").strip() + + birth_raw = s("birth_date") + death_raw = s("death_date") + + birth_year = _parse_year(birth_raw) + death_year = _parse_year(death_raw) + + notes_parts = [] + if birth_raw and birth_year is None: + notes_parts.append(f"[Geburtsdatum: {birth_raw}]") + if death_raw and death_year is None: + notes_parts.append(f"[Todesdatum: {death_raw}]") + bemerkung = s("notes") + if bemerkung: + notes_parts.append(bemerkung) + + maiden = s("maiden_name") or None + spouse = s("spouse") or None + bemerkung_out = bemerkung or None + + return { + "rowId": f"row_{row_num:03d}", + "firstName": s("first_name"), + "lastName": s("last_name"), + "maidenName": maiden, + "alias": None, + "notes": " ".join(notes_parts) or None, + "birthYear": birth_year, + "deathYear": death_year, + "birthPlace": s("birth_place") or None, + "deathPlace": s("death_place") or None, + "generation": _parse_generation(s("generation")), + "familyMember": True, + "_spouse_raw": spouse, + "_bemerkung_raw": bemerkung_out, + } diff --git a/tools/import-normalizer/tests/test_persons_tree.py b/tools/import-normalizer/tests/test_persons_tree.py index 8b040e1d..4b509156 100644 --- a/tools/import-normalizer/tests/test_persons_tree.py +++ b/tools/import-normalizer/tests/test_persons_tree.py @@ -171,3 +171,61 @@ def test_resolve_one_ambiguous(): row_id, reason = persons_tree._resolve_one("Cram", idx) assert row_id is None assert reason == "ambiguous" + + +def test_parse_row_serial_dates(): + fields = { + "generation": "G 3", "last_name": "Allemeyer", "first_name": "Elsgard", + "maiden_name": "Wöhler", "birth_date": "7568", "birth_place": "Garz", + "death_date": "36222", "death_place": "Espelkamp", + "spouse": "Allemeyer Werner", "notes": "Nichte von Herbert", + } + p = persons_tree._parse_row(2, fields) + assert p["rowId"] == "row_002" + assert p["firstName"] == "Elsgard" + assert p["lastName"] == "Allemeyer" + assert p["maidenName"] == "Wöhler" + assert p["birthYear"] == 1920 + assert p["deathYear"] == 1999 + assert p["birthPlace"] == "Garz" + assert p["deathPlace"] == "Espelkamp" + assert p["generation"] == 3 + assert p["familyMember"] is True + assert p["_spouse_raw"] == "Allemeyer Werner" + assert p["_bemerkung_raw"] == "Nichte von Herbert" + assert "[Geburtsdatum" not in (p["notes"] or "") + + +def test_parse_row_string_birth_date(): + fields = { + "generation": "G 2", "last_name": "Cram", "first_name": "Herbert", + "maiden_name": "", "birth_date": "25.6.1890", "birth_place": "Texas", + "death_date": "", "death_place": "", "spouse": "", "notes": "", + } + p = persons_tree._parse_row(28, fields) + assert p["birthYear"] == 1890 + assert p["deathYear"] is None + assert p["notes"] is None or p["notes"] == "" + + +def test_parse_row_unresolvable_date_goes_to_notes(): + fields = { + "generation": "G 3", "last_name": "Heydrich", "first_name": "Dieter", + "maiden_name": "", "birth_date": "28.9.", "birth_place": "", + "death_date": "", "death_place": "", "spouse": "", "notes": "Bruder v Ingrid", + } + p = persons_tree._parse_row(96, fields) + assert p["birthYear"] is None + assert "[Geburtsdatum: 28.9.]" in p["notes"] + assert "Bruder v Ingrid" in p["notes"] + + +def test_parse_row_empty_spouse_and_notes(): + fields = { + "generation": "G 4", "last_name": "Allemeyer", "first_name": "Jürgen", + "maiden_name": "", "birth_date": "", "birth_place": "", + "death_date": "", "death_place": "", "spouse": "", "notes": "", + } + p = persons_tree._parse_row(4, fields) + assert p["_spouse_raw"] is None + assert p["_bemerkung_raw"] is None