import config import persons from persons import NameClass def test_slugify(): assert persons.slugify("de Gruyter", "Eugenie") == "de-gruyter-eugenie" assert persons.slugify("Müller", "Karl Erhard") == "mueller-karl-erhard" def test_parse_register_basic(): rows = [ {"generation": "G 1", "last_name": "Blomquist", "first_name": "Charlotte,Meta,Jacobi", "maiden_name": "Ruge", "birth_date": "30.8.1862", "birth_place": "Schülperneusiel", "death_date": "1934-07-23", "death_place": "Göteborg", "spouse": '"Tante Lolly"', "notes": "Schwester v Marie Cram"}, {"generation": "G 2", "last_name": "Bohrmann", "first_name": "Else", "maiden_name": "Cram", "birth_date": "28.11.1888", "spouse": "Ludwig Bohrmann", "notes": "Schwester v Herbert"}, ] people = persons.parse_register(rows) p = people[0] assert p.person_id == "blomquist-charlotte" assert p.first_name == "Charlotte" assert p.maiden_name == "Ruge" assert p.birth_date == "1862-08-30" assert p.nickname == "Tante Lolly" # quoted spouse field is a nickname, not a spouse assert p.spouse == "" assert "Meta" in p.extra_given_names and "Jacobi" in p.extra_given_names p2 = people[1] assert p2.maiden_name == "Cram" assert p2.spouse == "Ludwig Bohrmann" assert p2.provisional is False def test_parse_register_dedups_colliding_ids(): # Two people with the same first+last name: BOTH get a numeric suffix (no ambiguous base id). people = persons.parse_register([ {"last_name": "Cram", "first_name": "Hans"}, {"last_name": "Cram", "first_name": "Hans"}, ]) ids = [p.person_id for p in people] assert ids == ["cram-hans-1", "cram-hans-2"] assert len(set(ids)) == 2 def test_split_receivers(): assert persons.split_receivers("Eugenie Müller") == ["Eugenie Müller"] assert persons.split_receivers("Walter und Eugenie de Gruyter") == ["Walter de Gruyter", "Eugenie de Gruyter"] assert persons.split_receivers("Hedi und Tutu (Gruber)") == ["Hedi Gruber", "Tutu Gruber"] assert persons.split_receivers("Clara u Familie") == ["Clara"] assert persons.split_receivers("Eugenie de Gruyter geb. Müller") == ["Eugenie de Gruyter"] assert persons.split_receivers("Herbert u Clara") == ["Herbert", "Clara"] assert persons.split_receivers("") == [] assert persons.split_receivers("geb. Müller") == [] # maiden-only cell -> no person assert persons.split_receivers("Herbert//Clara") == ["Herbert", "Clara"] # // separator def test_find_known_last_name(): assert persons.find_known_last_name("Eugenie de Gruyter") == "de Gruyter" assert persons.find_known_last_name("Clara") is None def test_alias_index_resolves_maiden_and_married(): people = persons.parse_register([ {"last_name": "de Gruyter", "first_name": "Eugenie", "maiden_name": "Müller"}, {"last_name": "Cram", "first_name": "Clara"}, ]) idx = persons.AliasIndex(people) eugenie = people[0].person_id assert idx.resolve("Eugenie de Gruyter") == eugenie # canonical assert idx.resolve("Eugenie Müller") == eugenie # maiden alias assert idx.resolve("eugenie müller") == eugenie # normalized assert idx.resolve("Nobody Unknown") is None def test_alias_index_suggestion(): people = persons.parse_register([{"last_name": "Wittkopf", "first_name": "Hans"}]) idx = persons.AliasIndex(people) sid, score = idx.suggest("Hans Wittkop") # typo assert sid == people[0].person_id and score >= config.FUZZY_SUGGEST_THRESHOLD def test_alias_index_first_name_only_when_unambiguous(): people = persons.parse_register([ {"last_name": "Cram", "first_name": "Clara"}, {"last_name": "de Gruyter", "first_name": "Walter"}, {"last_name": "Cram", "first_name": "Walter"}, # 2nd "Walter" -> first name ambiguous ]) idx = persons.AliasIndex(people) assert idx.resolve("Clara") == people[0].person_id # unique first name resolves assert idx.resolve("Walter") is None # ambiguous first name does NOT resolve assert idx.display(people[0].person_id) == "Clara Cram" GIVEN = {"ella", "anita", "kurt", "georg", "clara", "eugenie"} def test_classify_unknown(): assert persons.classify_name("?", GIVEN) is NameClass.UNKNOWN assert persons.classify_name("A. Kredell?", GIVEN) is NameClass.UNKNOWN assert persons.classify_name("unbekannt", GIVEN) is NameClass.UNKNOWN def test_classify_prose(): assert persons.classify_name("Adressenliste v Clara Cram zur Kondolenz", GIVEN) is NameClass.PROSE assert persons.classify_name("Clara de Gruyter(*1871)", GIVEN) is NameClass.PROSE # digit assert persons.classify_name('"Cramiade" Gedicht', GIVEN) is NameClass.PROSE # quote def test_classify_collective(): assert persons.classify_name("Familie", GIVEN) is NameClass.COLLECTIVE assert persons.classify_name("Fam.Cram", GIVEN) is NameClass.COLLECTIVE assert persons.classify_name("Eltern Cram", GIVEN) is NameClass.COLLECTIVE assert persons.classify_name("seine Kinder", GIVEN) is NameClass.COLLECTIVE def test_classify_relational(): assert persons.classify_name("Cousine Emmy Haniel", GIVEN) is NameClass.RELATIONAL assert persons.classify_name("Schwester Hanni", GIVEN) is NameClass.RELATIONAL def test_classify_single_token(): assert persons.classify_name("Agnes", GIVEN) is NameClass.SINGLE_TOKEN assert persons.classify_name("A.B.", GIVEN) is NameClass.SINGLE_TOKEN def test_classify_ambiguous_pair(): assert persons.classify_name("Ella Anita", GIVEN) is NameClass.AMBIGUOUS_PAIR assert persons.classify_name("Kurt Georg", GIVEN) is NameClass.AMBIGUOUS_PAIR def test_classify_resolvable_single_person(): # first + surname (surname not a given name) -> one real person, NOT ambiguous assert persons.classify_name("Mieze Schefold", GIVEN) is NameClass.RESOLVABLE assert persons.classify_name("Adolf Butenandt", GIVEN) is NameClass.RESOLVABLE