feat(normalizer): unresolved-names report + fix ambiguous-pair over-flagging
Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -93,10 +93,17 @@ def test_resolve_one_override_increments_hits():
|
||||
assert name == "Eugenie de Gruyter" # display comes from the alias index
|
||||
assert ctx.override_hits == 1
|
||||
|
||||
def test_ambiguous_space_pair_flagged_not_split():
|
||||
# US-PERS-02 AC4: "Ella Anita" is kept as one provisional + flagged, never guessed into two.
|
||||
ctx = _ctx()
|
||||
def test_ambiguous_pair_recorded_in_unresolved():
|
||||
people = persons.parse_register([{"last_name": "de Gruyter", "first_name": "Walter"}])
|
||||
ctx = persons.ResolutionContext(persons.AliasIndex(people), name_overrides={},
|
||||
given_names={"ella", "anita"})
|
||||
raw = documents.RawRow(source_row=7, index="C-0200", sender="", receivers="Ella Anita")
|
||||
doc = documents.to_canonical(raw, ctx, date_overrides={})
|
||||
assert len(doc.receiver_person_ids) == 1 # not split
|
||||
assert any(part == "Ella Anita" for _, part, _ in ctx.ambiguous)
|
||||
assert len(doc.receiver_person_ids) == 1 # not split — one provisional
|
||||
assert any(name == "Ella Anita" and cat == "ambiguous_pair" for name, cat, _ in ctx.unresolved)
|
||||
|
||||
def test_resolvable_first_surname_pair_not_unresolved():
|
||||
ctx = persons.ResolutionContext(persons.AliasIndex([]), name_overrides={},
|
||||
given_names={"ella", "anita"})
|
||||
ctx.resolve_one("Mieze Schefold", source_row=1) # surname is not a given name
|
||||
assert ctx.unresolved == [] # RESOLVABLE -> not recorded
|
||||
|
||||
Reference in New Issue
Block a user