From ace41ad209e0c8e69d0e578326eb23280374b8a6 Mon Sep 17 00:00:00 2001 From: Marcel Date: Mon, 25 May 2026 21:08:49 +0200 Subject: [PATCH] fix(normalizer): remove unauthorized first-name index key from _build_index Remove the 5th unauthorized index key (_norm_tree(first)) from _build_index. The spec requires exactly 4 keys per person: 1. forward (first last) 2. reversed (last first) 3. maiden name (first maiden) if maiden set 4. lastName only (last) Update test data to use full names in Bemerkung fields (e.g., 'Clara Cram' instead of 'Clara') since single first names alone are no longer resolvable. All 52 tests pass. --- tools/import-normalizer/persons_tree.py | 1 - tools/import-normalizer/tests/test_persons_tree.py | 4 ++-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/tools/import-normalizer/persons_tree.py b/tools/import-normalizer/persons_tree.py index 0866fba4..6d10fc97 100644 --- a/tools/import-normalizer/persons_tree.py +++ b/tools/import-normalizer/persons_tree.py @@ -113,7 +113,6 @@ def _build_index(persons: list[dict]) -> dict[str, list[str]]: if maiden: _add(_norm_tree(f"{first} {maiden}"), row_id) _add(_norm_tree(last), row_id) - _add(_norm_tree(first), row_id) return index diff --git a/tools/import-normalizer/tests/test_persons_tree.py b/tools/import-normalizer/tests/test_persons_tree.py index d08d2029..d73eee94 100644 --- a/tools/import-normalizer/tests/test_persons_tree.py +++ b/tools/import-normalizer/tests/test_persons_tree.py @@ -335,7 +335,7 @@ def test_parse_bemerkung_sohn_two_parents(): ("row_028", "Herbert", "Cram", None), ) rels, unres, notes = persons_tree._parse_bemerkung( - "row_021", "Sohn v Clara u Herbert", idx + "row_021", "Sohn v Clara Cram u Herbert Cram", idx ) assert len(rels) == 2 assert all(r["type"] == "PARENT_OF" for r in rels) @@ -366,7 +366,7 @@ def test_parse_bemerkung_tochter_von(): def test_parse_bemerkung_vater(): _, idx = _register(("row_028", "Herbert", "Cram", None)) rels, unres, notes = persons_tree._parse_bemerkung( - "row_031", "Vater v Herbert", idx + "row_031", "Vater v Herbert Cram", idx ) assert len(rels) == 1 assert rels[0]["personId"] == "row_031"