feat(normalizer): classify_name + NameClass
Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -1,5 +1,6 @@
|
||||
import config
|
||||
import persons
|
||||
from persons import NameClass
|
||||
|
||||
def test_slugify():
|
||||
assert persons.slugify("de Gruyter", "Eugenie") == "de-gruyter-eugenie"
|
||||
@@ -82,3 +83,39 @@ def test_alias_index_first_name_only_when_unambiguous():
|
||||
assert idx.resolve("Clara") == people[0].person_id # unique first name resolves
|
||||
assert idx.resolve("Walter") is None # ambiguous first name does NOT resolve
|
||||
assert idx.display(people[0].person_id) == "Clara Cram"
|
||||
|
||||
|
||||
GIVEN = {"ella", "anita", "kurt", "georg", "clara", "eugenie"}
|
||||
|
||||
def test_classify_unknown():
|
||||
assert persons.classify_name("?", GIVEN) is NameClass.UNKNOWN
|
||||
assert persons.classify_name("A. Kredell?", GIVEN) is NameClass.UNKNOWN
|
||||
assert persons.classify_name("unbekannt", GIVEN) is NameClass.UNKNOWN
|
||||
|
||||
def test_classify_prose():
|
||||
assert persons.classify_name("Adressenliste v Clara Cram zur Kondolenz", GIVEN) is NameClass.PROSE
|
||||
assert persons.classify_name("Clara de Gruyter(*1871)", GIVEN) is NameClass.PROSE # digit
|
||||
assert persons.classify_name('"Cramiade" Gedicht', GIVEN) is NameClass.PROSE # quote
|
||||
|
||||
def test_classify_collective():
|
||||
assert persons.classify_name("Familie", GIVEN) is NameClass.COLLECTIVE
|
||||
assert persons.classify_name("Fam.Cram", GIVEN) is NameClass.COLLECTIVE
|
||||
assert persons.classify_name("Eltern Cram", GIVEN) is NameClass.COLLECTIVE
|
||||
assert persons.classify_name("seine Kinder", GIVEN) is NameClass.COLLECTIVE
|
||||
|
||||
def test_classify_relational():
|
||||
assert persons.classify_name("Cousine Emmy Haniel", GIVEN) is NameClass.RELATIONAL
|
||||
assert persons.classify_name("Schwester Hanni", GIVEN) is NameClass.RELATIONAL
|
||||
|
||||
def test_classify_single_token():
|
||||
assert persons.classify_name("Agnes", GIVEN) is NameClass.SINGLE_TOKEN
|
||||
assert persons.classify_name("A.B.", GIVEN) is NameClass.SINGLE_TOKEN
|
||||
|
||||
def test_classify_ambiguous_pair():
|
||||
assert persons.classify_name("Ella Anita", GIVEN) is NameClass.AMBIGUOUS_PAIR
|
||||
assert persons.classify_name("Kurt Georg", GIVEN) is NameClass.AMBIGUOUS_PAIR
|
||||
|
||||
def test_classify_resolvable_single_person():
|
||||
# first + surname (surname not a given name) -> one real person, NOT ambiguous
|
||||
assert persons.classify_name("Mieze Schefold", GIVEN) is NameClass.RESOLVABLE
|
||||
assert persons.classify_name("Adolf Butenandt", GIVEN) is NameClass.RESOLVABLE
|
||||
|
||||
Reference in New Issue
Block a user