174 lines
5.2 KiB
Python
174 lines
5.2 KiB
Python
import sys
|
|
from pathlib import Path
|
|
sys.path.insert(0, str(Path(__file__).parent.parent))
|
|
|
|
import persons_tree
|
|
|
|
|
|
def test_parse_year_iso_string():
|
|
assert persons_tree._parse_year("1920-09-20") == 1920
|
|
|
|
|
|
def test_parse_year_excel_serial_birth():
|
|
# 7568 days from 1899-12-30 = 1920-09-19 or -20 depending on leap counting
|
|
assert persons_tree._parse_year("7568") == 1920
|
|
|
|
|
|
def test_parse_year_excel_serial_death():
|
|
# 36222 days from 1899-12-30 ≈ 1999
|
|
assert persons_tree._parse_year("36222") == 1999
|
|
|
|
|
|
def test_parse_year_excel_serial_small():
|
|
# 177 days from 1899-12-30 = 1900-06-25
|
|
assert persons_tree._parse_year("177") == 1900
|
|
|
|
|
|
def test_parse_year_german_date_string():
|
|
assert persons_tree._parse_year("30.8.1862") == 1862
|
|
|
|
|
|
def test_parse_year_year_only():
|
|
assert persons_tree._parse_year("1930") == 1930
|
|
|
|
|
|
def test_parse_year_free_text():
|
|
assert persons_tree._parse_year("August 1941") == 1941
|
|
|
|
|
|
def test_parse_year_none():
|
|
assert persons_tree._parse_year(None) is None
|
|
|
|
|
|
def test_parse_year_empty():
|
|
assert persons_tree._parse_year("") is None
|
|
|
|
|
|
def test_parse_year_unresolvable_truncated():
|
|
# "2.9.196" has no valid 4-digit year — returns None
|
|
assert persons_tree._parse_year("2.9.196") is None
|
|
|
|
|
|
def test_parse_year_typo_year():
|
|
# "4.3.1023" — year 1023 outside 1700-2100 guard — returns None
|
|
assert persons_tree._parse_year("4.3.1023") is None
|
|
|
|
|
|
def test_parse_year_bare_out_of_range_year_is_none():
|
|
# "1023" is a plausible typo for "1923" but is NOT an Excel serial.
|
|
# parse_date("1023") parses it as year 1023 (out of 1700-2100 guard).
|
|
# The serial branch must NOT re-interpret it as a serial.
|
|
assert persons_tree._parse_year("1023") is None
|
|
|
|
|
|
def test_parse_generation_space():
|
|
assert persons_tree._parse_generation("G 3") == 3
|
|
|
|
|
|
def test_parse_generation_no_space():
|
|
assert persons_tree._parse_generation("G3") == 3
|
|
|
|
|
|
def test_parse_generation_extra_spaces():
|
|
assert persons_tree._parse_generation("G 0") == 0
|
|
|
|
|
|
def test_parse_generation_trailing_garbage():
|
|
assert persons_tree._parse_generation("G 2 de Gruyter") == 2
|
|
|
|
|
|
def test_parse_generation_empty():
|
|
assert persons_tree._parse_generation("") is None
|
|
|
|
|
|
def test_parse_generation_none():
|
|
assert persons_tree._parse_generation(None) is None
|
|
|
|
|
|
def test_norm_tree_basic():
|
|
assert persons_tree._norm_tree("Werner Allemeyer") == "werner allemeyer"
|
|
|
|
|
|
def test_norm_tree_diacritics():
|
|
assert persons_tree._norm_tree("Wöhler") == "woehler"
|
|
|
|
|
|
def test_norm_tree_strips_parens():
|
|
assert persons_tree._norm_tree("Otto (Herbert)") == "otto"
|
|
|
|
|
|
def test_norm_tree_strips_quotes():
|
|
assert persons_tree._norm_tree('"Tante Lolly"') == "tante lolly"
|
|
|
|
|
|
def test_norm_tree_strips_geographic_suffix():
|
|
assert persons_tree._norm_tree("Walter Cram Aachen") == "walter cram"
|
|
|
|
|
|
def test_norm_tree_strips_mexiko():
|
|
assert persons_tree._norm_tree("Hans Cram Mexiko") == "hans cram"
|
|
|
|
|
|
def test_norm_tree_collapses_whitespace():
|
|
assert persons_tree._norm_tree(" Clara de Gruyter ") == "clara de gruyter"
|
|
|
|
|
|
def test_build_index_forward_lookup():
|
|
persons = [{"rowId": "row_002", "firstName": "Werner", "lastName": "Allemeyer", "maidenName": None}]
|
|
idx = persons_tree._build_index(persons)
|
|
assert "werner allemeyer" in idx
|
|
assert idx["werner allemeyer"] == ["row_002"]
|
|
|
|
|
|
def test_build_index_reversed_lookup():
|
|
persons = [{"rowId": "row_002", "firstName": "Werner", "lastName": "Allemeyer", "maidenName": None}]
|
|
idx = persons_tree._build_index(persons)
|
|
assert idx.get("allemeyer werner") == ["row_002"]
|
|
|
|
|
|
def test_build_index_maiden_name_lookup():
|
|
persons = [{"rowId": "row_002", "firstName": "Elsgard", "lastName": "Allemeyer", "maidenName": "Wöhler"}]
|
|
idx = persons_tree._build_index(persons)
|
|
assert idx.get("elsgard woehler") == ["row_002"]
|
|
|
|
|
|
def test_build_index_single_token_fallback():
|
|
persons = [{"rowId": "row_028", "firstName": "Herbert", "lastName": "Cram", "maidenName": None}]
|
|
idx = persons_tree._build_index(persons)
|
|
assert idx.get("cram") == ["row_028"]
|
|
|
|
|
|
def test_build_index_ambiguous_single_token():
|
|
persons = [
|
|
{"rowId": "row_028", "firstName": "Herbert", "lastName": "Cram", "maidenName": None},
|
|
{"rowId": "row_019", "firstName": "Clara", "lastName": "Cram", "maidenName": None},
|
|
]
|
|
idx = persons_tree._build_index(persons)
|
|
assert set(idx["cram"]) == {"row_028", "row_019"}
|
|
|
|
|
|
def test_resolve_one_found():
|
|
persons = [{"rowId": "row_003", "firstName": "Werner", "lastName": "Allemeyer", "maidenName": None}]
|
|
idx = persons_tree._build_index(persons)
|
|
row_id, reason = persons_tree._resolve_one("Allemeyer Werner", idx)
|
|
assert row_id == "row_003"
|
|
assert reason is None
|
|
|
|
|
|
def test_resolve_one_not_found():
|
|
idx = {}
|
|
row_id, reason = persons_tree._resolve_one("Nobody Unknown", idx)
|
|
assert row_id is None
|
|
assert reason == "not_found"
|
|
|
|
|
|
def test_resolve_one_ambiguous():
|
|
persons = [
|
|
{"rowId": "row_028", "firstName": "Herbert", "lastName": "Cram", "maidenName": None},
|
|
{"rowId": "row_019", "firstName": "Clara", "lastName": "Cram", "maidenName": None},
|
|
]
|
|
idx = persons_tree._build_index(persons)
|
|
row_id, reason = persons_tree._resolve_one("Cram", idx)
|
|
assert row_id is None
|
|
assert reason == "ambiguous"
|