133 lines
6.4 KiB
Python
133 lines
6.4 KiB
Python
import config
|
|
import persons
|
|
from persons import NameClass
|
|
|
|
def test_slugify():
|
|
assert persons.slugify("de Gruyter", "Eugenie") == "de-gruyter-eugenie"
|
|
assert persons.slugify("Müller", "Karl Erhard") == "mueller-karl-erhard"
|
|
|
|
def test_parse_register_basic():
|
|
rows = [
|
|
{"generation": "G 1", "last_name": "Blomquist", "first_name": "Charlotte,Meta,Jacobi",
|
|
"maiden_name": "Ruge", "birth_date": "30.8.1862", "birth_place": "Schülperneusiel",
|
|
"death_date": "1934-07-23", "death_place": "Göteborg", "spouse": '"Tante Lolly"',
|
|
"notes": "Schwester v Marie Cram"},
|
|
{"generation": "G 2", "last_name": "Bohrmann", "first_name": "Else",
|
|
"maiden_name": "Cram", "birth_date": "28.11.1888", "spouse": "Ludwig Bohrmann",
|
|
"notes": "Schwester v Herbert"},
|
|
]
|
|
people = persons.parse_register(rows)
|
|
p = people[0]
|
|
assert p.person_id == "blomquist-charlotte"
|
|
assert p.first_name == "Charlotte"
|
|
assert p.maiden_name == "Ruge"
|
|
assert p.birth_date == "1862-08-30"
|
|
assert p.nickname == "Tante Lolly" # quoted spouse field is a nickname, not a spouse
|
|
assert p.spouse == ""
|
|
assert "Meta" in p.extra_given_names and "Jacobi" in p.extra_given_names
|
|
p2 = people[1]
|
|
assert p2.maiden_name == "Cram"
|
|
assert p2.spouse == "Ludwig Bohrmann"
|
|
assert p2.provisional is False
|
|
|
|
def test_parse_register_dedups_colliding_ids():
|
|
# Two people with the same first+last name: BOTH get a numeric suffix (no ambiguous base id).
|
|
people = persons.parse_register([
|
|
{"last_name": "Cram", "first_name": "Hans"},
|
|
{"last_name": "Cram", "first_name": "Hans"},
|
|
])
|
|
ids = [p.person_id for p in people]
|
|
assert ids == ["cram-hans-1", "cram-hans-2"]
|
|
assert len(set(ids)) == 2
|
|
|
|
def test_split_receivers():
|
|
assert persons.split_receivers("Eugenie Müller") == ["Eugenie Müller"]
|
|
assert persons.split_receivers("Walter und Eugenie de Gruyter") == ["Walter de Gruyter", "Eugenie de Gruyter"]
|
|
assert persons.split_receivers("Hedi und Tutu (Gruber)") == ["Hedi Gruber", "Tutu Gruber"]
|
|
assert persons.split_receivers("Clara u Familie") == ["Clara"]
|
|
assert persons.split_receivers("Eugenie de Gruyter geb. Müller") == ["Eugenie de Gruyter"]
|
|
assert persons.split_receivers("Herbert u Clara") == ["Herbert", "Clara"]
|
|
assert persons.split_receivers("") == []
|
|
assert persons.split_receivers("geb. Müller") == [] # maiden-only cell -> no person
|
|
assert persons.split_receivers("Herbert//Clara") == ["Herbert", "Clara"] # // separator
|
|
|
|
def test_find_known_last_name():
|
|
assert persons.find_known_last_name("Eugenie de Gruyter") == "de Gruyter"
|
|
assert persons.find_known_last_name("Clara") is None
|
|
|
|
def test_alias_index_resolves_maiden_and_married():
|
|
people = persons.parse_register([
|
|
{"last_name": "de Gruyter", "first_name": "Eugenie", "maiden_name": "Müller"},
|
|
{"last_name": "Cram", "first_name": "Clara"},
|
|
])
|
|
idx = persons.AliasIndex(people)
|
|
eugenie = people[0].person_id
|
|
assert idx.resolve("Eugenie de Gruyter") == eugenie # canonical
|
|
assert idx.resolve("Eugenie Müller") == eugenie # maiden alias
|
|
assert idx.resolve("eugenie müller") == eugenie # normalized
|
|
assert idx.resolve("Nobody Unknown") is None
|
|
|
|
def test_alias_index_suggestion():
|
|
people = persons.parse_register([{"last_name": "Wittkopf", "first_name": "Hans"}])
|
|
idx = persons.AliasIndex(people)
|
|
sid, score = idx.suggest("Hans Wittkop") # typo
|
|
assert sid == people[0].person_id and score >= config.FUZZY_SUGGEST_THRESHOLD
|
|
|
|
def test_alias_index_first_name_only_when_unambiguous():
|
|
people = persons.parse_register([
|
|
{"last_name": "Cram", "first_name": "Clara"},
|
|
{"last_name": "de Gruyter", "first_name": "Walter"},
|
|
{"last_name": "Cram", "first_name": "Walter"}, # 2nd "Walter" -> first name ambiguous
|
|
])
|
|
idx = persons.AliasIndex(people)
|
|
assert idx.resolve("Clara") == people[0].person_id # unique first name resolves
|
|
assert idx.resolve("Walter") is None # ambiguous first name does NOT resolve
|
|
assert idx.display(people[0].person_id) == "Clara Cram"
|
|
|
|
|
|
GIVEN = {"ella", "anita", "kurt", "georg", "clara", "eugenie"}
|
|
|
|
def test_classify_unknown():
|
|
assert persons.classify_name("?", GIVEN) is NameClass.UNKNOWN
|
|
assert persons.classify_name("A. Kredell?", GIVEN) is NameClass.UNKNOWN
|
|
assert persons.classify_name("unbekannt", GIVEN) is NameClass.UNKNOWN
|
|
|
|
def test_classify_prose():
|
|
assert persons.classify_name("Adressenliste v Clara Cram zur Kondolenz", GIVEN) is NameClass.PROSE
|
|
assert persons.classify_name("Clara de Gruyter(*1871)", GIVEN) is NameClass.PROSE # digit
|
|
assert persons.classify_name('"Cramiade" Gedicht', GIVEN) is NameClass.PROSE # quote
|
|
|
|
def test_classify_collective():
|
|
assert persons.classify_name("Familie", GIVEN) is NameClass.COLLECTIVE
|
|
assert persons.classify_name("Fam.Cram", GIVEN) is NameClass.COLLECTIVE
|
|
assert persons.classify_name("Eltern Cram", GIVEN) is NameClass.COLLECTIVE
|
|
assert persons.classify_name("seine Kinder", GIVEN) is NameClass.COLLECTIVE
|
|
|
|
def test_classify_relational():
|
|
assert persons.classify_name("Cousine Emmy Haniel", GIVEN) is NameClass.RELATIONAL
|
|
assert persons.classify_name("Schwester Hanni", GIVEN) is NameClass.RELATIONAL
|
|
|
|
def test_classify_single_token():
|
|
assert persons.classify_name("Agnes", GIVEN) is NameClass.SINGLE_TOKEN
|
|
assert persons.classify_name("A.B.", GIVEN) is NameClass.SINGLE_TOKEN
|
|
|
|
def test_classify_ambiguous_pair():
|
|
assert persons.classify_name("Ella Anita", GIVEN) is NameClass.AMBIGUOUS_PAIR
|
|
assert persons.classify_name("Kurt Georg", GIVEN) is NameClass.AMBIGUOUS_PAIR
|
|
|
|
def test_classify_resolvable_single_person():
|
|
# first + surname (surname not a given name) -> one real person, NOT ambiguous
|
|
assert persons.classify_name("Mieze Schefold", GIVEN) is NameClass.RESOLVABLE
|
|
assert persons.classify_name("Adolf Butenandt", GIVEN) is NameClass.RESOLVABLE
|
|
|
|
def test_build_given_names():
|
|
people = persons.parse_register([
|
|
{"last_name": "de Gruyter", "first_name": "Eugenie"},
|
|
{"last_name": "Cram", "first_name": "Charlotte,Meta"}, # comma -> primary + extra given
|
|
])
|
|
g = persons.build_given_names(people, {"Anita"})
|
|
assert "eugenie" in g
|
|
assert "charlotte" in g and "meta" in g # primary + extra given names
|
|
assert "anita" in g # from the extra set, normalized
|
|
assert "schefold" not in g
|