import config import persons def test_slugify(): assert persons.slugify("de Gruyter", "Eugenie") == "de-gruyter-eugenie" assert persons.slugify("Müller", "Karl Erhard") == "mueller-karl-erhard" def test_parse_register_basic(): rows = [ {"generation": "G 1", "last_name": "Blomquist", "first_name": "Charlotte,Meta,Jacobi", "maiden_name": "Ruge", "birth_date": "30.8.1862", "birth_place": "Schülperneusiel", "death_date": "1934-07-23", "death_place": "Göteborg", "spouse": '"Tante Lolly"', "notes": "Schwester v Marie Cram"}, {"generation": "G 2", "last_name": "Bohrmann", "first_name": "Else", "maiden_name": "Cram", "birth_date": "28.11.1888", "spouse": "Ludwig Bohrmann", "notes": "Schwester v Herbert"}, ] people = persons.parse_register(rows) p = people[0] assert p.person_id == "blomquist-charlotte" assert p.first_name == "Charlotte" assert p.maiden_name == "Ruge" assert p.birth_date == "1862-08-30" assert p.nickname == "Tante Lolly" # quoted spouse field is a nickname, not a spouse assert p.spouse == "" assert "Meta" in p.extra_given_names and "Jacobi" in p.extra_given_names p2 = people[1] assert p2.maiden_name == "Cram" assert p2.spouse == "Ludwig Bohrmann" assert p2.provisional is False def test_parse_register_dedups_colliding_ids(): # Two people with the same first+last name: BOTH get a numeric suffix (no ambiguous base id). people = persons.parse_register([ {"last_name": "Cram", "first_name": "Hans"}, {"last_name": "Cram", "first_name": "Hans"}, ]) ids = [p.person_id for p in people] assert ids == ["cram-hans-1", "cram-hans-2"] assert len(set(ids)) == 2 def test_split_receivers(): assert persons.split_receivers("Eugenie Müller") == ["Eugenie Müller"] assert persons.split_receivers("Walter und Eugenie de Gruyter") == ["Walter de Gruyter", "Eugenie de Gruyter"] assert persons.split_receivers("Hedi und Tutu (Gruber)") == ["Hedi Gruber", "Tutu Gruber"] assert persons.split_receivers("Clara u Familie") == ["Clara"] assert persons.split_receivers("Eugenie de Gruyter geb. Müller") == ["Eugenie de Gruyter"] assert persons.split_receivers("Herbert u Clara") == ["Herbert", "Clara"] assert persons.split_receivers("") == [] assert persons.split_receivers("geb. Müller") == [] # maiden-only cell -> no person assert persons.split_receivers("Herbert//Clara") == ["Herbert", "Clara"] # // separator def test_find_known_last_name(): assert persons.find_known_last_name("Eugenie de Gruyter") == "de Gruyter" assert persons.find_known_last_name("Clara") is None def test_alias_index_resolves_maiden_and_married(): people = persons.parse_register([ {"last_name": "de Gruyter", "first_name": "Eugenie", "maiden_name": "Müller"}, {"last_name": "Cram", "first_name": "Clara"}, ]) idx = persons.AliasIndex(people) eugenie = people[0].person_id assert idx.resolve("Eugenie de Gruyter") == eugenie # canonical assert idx.resolve("Eugenie Müller") == eugenie # maiden alias assert idx.resolve("eugenie müller") == eugenie # normalized assert idx.resolve("Nobody Unknown") is None def test_alias_index_suggestion(): people = persons.parse_register([{"last_name": "Wittkopf", "first_name": "Hans"}]) idx = persons.AliasIndex(people) sid, score = idx.suggest("Hans Wittkop") # typo assert sid == people[0].person_id and score >= config.FUZZY_SUGGEST_THRESHOLD def test_alias_index_first_name_only_when_unambiguous(): people = persons.parse_register([ {"last_name": "Cram", "first_name": "Clara"}, {"last_name": "de Gruyter", "first_name": "Walter"}, {"last_name": "Cram", "first_name": "Walter"}, # 2nd "Walter" -> first name ambiguous ]) idx = persons.AliasIndex(people) assert idx.resolve("Clara") == people[0].person_id # unique first name resolves assert idx.resolve("Walter") is None # ambiguous first name does NOT resolve assert idx.display(people[0].person_id) == "Clara Cram"