feat(normalizer): build_given_names from register + supplement
Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -197,6 +197,22 @@ def classify_name(raw: str, given_names: set[str]) -> NameClass:
|
||||
# register; if they surface in review, lower-priority than the real prose entries.
|
||||
|
||||
|
||||
def build_given_names(register: list[Person], extra: set[str]) -> set[str]:
|
||||
"""Set of normalized given names from the register (first + extra given) plus a supplement.
|
||||
|
||||
Used by classify_name to tell a two-given-name pair (two people) from a first+surname.
|
||||
"""
|
||||
names: set[str] = set()
|
||||
for p in register:
|
||||
if p.first_name:
|
||||
names.add(_norm(p.first_name))
|
||||
for g in p.extra_given_names:
|
||||
names.add(_norm(g))
|
||||
for e in extra:
|
||||
names.add(_norm(e))
|
||||
return names
|
||||
|
||||
|
||||
class AliasIndex:
|
||||
def __init__(self, people: list[Person]):
|
||||
self._by_alias: dict[str, str] = {}
|
||||
|
||||
@@ -119,3 +119,14 @@ def test_classify_resolvable_single_person():
|
||||
# first + surname (surname not a given name) -> one real person, NOT ambiguous
|
||||
assert persons.classify_name("Mieze Schefold", GIVEN) is NameClass.RESOLVABLE
|
||||
assert persons.classify_name("Adolf Butenandt", GIVEN) is NameClass.RESOLVABLE
|
||||
|
||||
def test_build_given_names():
|
||||
people = persons.parse_register([
|
||||
{"last_name": "de Gruyter", "first_name": "Eugenie"},
|
||||
{"last_name": "Cram", "first_name": "Charlotte,Meta"}, # comma -> primary + extra given
|
||||
])
|
||||
g = persons.build_given_names(people, {"Anita"})
|
||||
assert "eugenie" in g
|
||||
assert "charlotte" in g and "meta" in g # primary + extra given names
|
||||
assert "anita" in g # from the extra set, normalized
|
||||
assert "schefold" not in g
|
||||
|
||||
Reference in New Issue
Block a user