import sys from pathlib import Path sys.path.insert(0, str(Path(__file__).parent.parent)) import persons_tree def test_parse_year_iso_string(): assert persons_tree._parse_year("1920-09-20") == 1920 def test_parse_year_excel_serial_birth(): # 7568 days from 1899-12-30 = 1920-09-19 or -20 depending on leap counting assert persons_tree._parse_year("7568") == 1920 def test_parse_year_excel_serial_death(): # 36222 days from 1899-12-30 ≈ 1999 assert persons_tree._parse_year("36222") == 1999 def test_parse_year_excel_serial_small(): # 177 days from 1899-12-30 = 1900-06-25 assert persons_tree._parse_year("177") == 1900 def test_parse_year_german_date_string(): assert persons_tree._parse_year("30.8.1862") == 1862 def test_parse_year_year_only(): assert persons_tree._parse_year("1930") == 1930 def test_parse_year_free_text(): assert persons_tree._parse_year("August 1941") == 1941 def test_parse_year_none(): assert persons_tree._parse_year(None) is None def test_parse_year_empty(): assert persons_tree._parse_year("") is None def test_parse_year_unresolvable_truncated(): # "2.9.196" has no valid 4-digit year — returns None assert persons_tree._parse_year("2.9.196") is None def test_parse_year_typo_year(): # "4.3.1023" — year 1023 outside 1700-2100 guard — returns None assert persons_tree._parse_year("4.3.1023") is None def test_parse_year_bare_out_of_range_year_is_none(): # "1023" is a plausible typo for "1923" but is NOT an Excel serial. # parse_date("1023") parses it as year 1023 (out of 1700-2100 guard). # The serial branch must NOT re-interpret it as a serial. assert persons_tree._parse_year("1023") is None def test_parse_generation_space(): assert persons_tree._parse_generation("G 3") == 3 def test_parse_generation_no_space(): assert persons_tree._parse_generation("G3") == 3 def test_parse_generation_extra_spaces(): assert persons_tree._parse_generation("G 0") == 0 def test_parse_generation_trailing_garbage(): assert persons_tree._parse_generation("G 2 de Gruyter") == 2 def test_parse_generation_empty(): assert persons_tree._parse_generation("") is None def test_parse_generation_none(): assert persons_tree._parse_generation(None) is None def test_norm_tree_basic(): assert persons_tree._norm_tree("Werner Allemeyer") == "werner allemeyer" def test_norm_tree_diacritics(): assert persons_tree._norm_tree("Wöhler") == "woehler" def test_norm_tree_strips_parens(): assert persons_tree._norm_tree("Otto (Herbert)") == "otto" def test_norm_tree_strips_quotes(): assert persons_tree._norm_tree('"Tante Lolly"') == "tante lolly" def test_norm_tree_strips_geographic_suffix(): assert persons_tree._norm_tree("Walter Cram Aachen") == "walter cram" def test_norm_tree_strips_mexiko(): assert persons_tree._norm_tree("Hans Cram Mexiko") == "hans cram" def test_norm_tree_collapses_whitespace(): assert persons_tree._norm_tree(" Clara de Gruyter ") == "clara de gruyter" def test_build_index_forward_lookup(): persons = [{"rowId": "row_002", "firstName": "Werner", "lastName": "Allemeyer", "maidenName": None}] idx = persons_tree._build_index(persons) assert "werner allemeyer" in idx assert idx["werner allemeyer"] == ["row_002"] def test_build_index_reversed_lookup(): persons = [{"rowId": "row_002", "firstName": "Werner", "lastName": "Allemeyer", "maidenName": None}] idx = persons_tree._build_index(persons) assert idx.get("allemeyer werner") == ["row_002"] def test_build_index_maiden_name_lookup(): persons = [{"rowId": "row_002", "firstName": "Elsgard", "lastName": "Allemeyer", "maidenName": "Wöhler"}] idx = persons_tree._build_index(persons) assert idx.get("elsgard woehler") == ["row_002"] def test_build_index_single_token_fallback(): persons = [{"rowId": "row_028", "firstName": "Herbert", "lastName": "Cram", "maidenName": None}] idx = persons_tree._build_index(persons) assert idx.get("cram") == ["row_028"] def test_build_index_ambiguous_single_token(): persons = [ {"rowId": "row_028", "firstName": "Herbert", "lastName": "Cram", "maidenName": None}, {"rowId": "row_019", "firstName": "Clara", "lastName": "Cram", "maidenName": None}, ] idx = persons_tree._build_index(persons) assert set(idx["cram"]) == {"row_028", "row_019"} def test_resolve_one_found(): persons = [{"rowId": "row_003", "firstName": "Werner", "lastName": "Allemeyer", "maidenName": None}] idx = persons_tree._build_index(persons) row_id, reason = persons_tree._resolve_one("Allemeyer Werner", idx) assert row_id == "row_003" assert reason is None def test_resolve_one_not_found(): idx = {} row_id, reason = persons_tree._resolve_one("Nobody Unknown", idx) assert row_id is None assert reason == "not_found" def test_resolve_one_ambiguous(): persons = [ {"rowId": "row_028", "firstName": "Herbert", "lastName": "Cram", "maidenName": None}, {"rowId": "row_019", "firstName": "Clara", "lastName": "Cram", "maidenName": None}, ] idx = persons_tree._build_index(persons) row_id, reason = persons_tree._resolve_one("Cram", idx) assert row_id is None assert reason == "ambiguous"