test(normalizer): real provisional-vs-register collision + override-hits coverage
Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -75,13 +75,23 @@ def test_to_canonical_splits_multi_sender():
|
||||
|
||||
def test_provisional_id_never_collides_with_register():
|
||||
# A provisional built from an unmatched string must not steal a register person_id.
|
||||
people = persons.parse_register([{"last_name": "Cram", "first_name": "Clara"}])
|
||||
people = persons.parse_register([{"last_name": "Xyz", "first_name": "Abc"}]) # id "xyz-abc"
|
||||
ctx = persons.ResolutionContext(persons.AliasIndex(people), name_overrides={})
|
||||
# Force a provisional whose natural slug equals the register id by using a string the
|
||||
# alias index will not resolve but that slugs to "cram-clara":
|
||||
pid, _, matched = ctx.resolve_one("Clara Cram (unsicher)", source_row=1)
|
||||
# "Abc, Xyz" misses the alias index (the comma changes the normalized key) but its
|
||||
# provisional slug is "xyz-abc" — already the register person's id, so it MUST be suffixed.
|
||||
pid, _, matched = ctx.resolve_one("Abc, Xyz", source_row=1)
|
||||
assert matched is False
|
||||
assert pid not in {"cram-clara"} or pid.endswith("-2") # suffixed away from the register id
|
||||
assert "xyz-abc" in ctx.index.known_ids
|
||||
assert pid == "xyz-abc-2" # suffixed away from the register id, not reused
|
||||
|
||||
def test_resolve_one_override_increments_hits():
|
||||
people = persons.parse_register([{"last_name": "de Gruyter", "first_name": "Eugenie"}])
|
||||
ctx = persons.ResolutionContext(persons.AliasIndex(people),
|
||||
name_overrides={"Genie": "de-gruyter-eugenie"})
|
||||
pid, name, matched = ctx.resolve_one("Genie", source_row=1)
|
||||
assert pid == "de-gruyter-eugenie" and matched is True
|
||||
assert name == "Eugenie de Gruyter" # display comes from the alias index
|
||||
assert ctx.override_hits == 1
|
||||
|
||||
def test_ambiguous_space_pair_flagged_not_split():
|
||||
# US-PERS-02 AC4: "Ella Anita" is kept as one provisional + flagged, never guessed into two.
|
||||
|
||||
Reference in New Issue
Block a user