test(normalizer): real provisional-vs-register collision + override-hits coverage

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
Marcel
2026-05-25 14:25:49 +02:00
parent 88c8063227
commit 366b484815
2 changed files with 60 additions and 5 deletions

View File

@@ -75,13 +75,23 @@ def test_to_canonical_splits_multi_sender():
def test_provisional_id_never_collides_with_register():
# A provisional built from an unmatched string must not steal a register person_id.
people = persons.parse_register([{"last_name": "Cram", "first_name": "Clara"}])
people = persons.parse_register([{"last_name": "Xyz", "first_name": "Abc"}]) # id "xyz-abc"
ctx = persons.ResolutionContext(persons.AliasIndex(people), name_overrides={})
# Force a provisional whose natural slug equals the register id by using a string the
# alias index will not resolve but that slugs to "cram-clara":
pid, _, matched = ctx.resolve_one("Clara Cram (unsicher)", source_row=1)
# "Abc, Xyz" misses the alias index (the comma changes the normalized key) but its
# provisional slug is "xyz-abc" — already the register person's id, so it MUST be suffixed.
pid, _, matched = ctx.resolve_one("Abc, Xyz", source_row=1)
assert matched is False
assert pid not in {"cram-clara"} or pid.endswith("-2") # suffixed away from the register id
assert "xyz-abc" in ctx.index.known_ids
assert pid == "xyz-abc-2" # suffixed away from the register id, not reused
def test_resolve_one_override_increments_hits():
people = persons.parse_register([{"last_name": "de Gruyter", "first_name": "Eugenie"}])
ctx = persons.ResolutionContext(persons.AliasIndex(people),
name_overrides={"Genie": "de-gruyter-eugenie"})
pid, name, matched = ctx.resolve_one("Genie", source_row=1)
assert pid == "de-gruyter-eugenie" and matched is True
assert name == "Eugenie de Gruyter" # display comes from the alias index
assert ctx.override_hits == 1
def test_ambiguous_space_pair_flagged_not_split():
# US-PERS-02 AC4: "Ella Anita" is kept as one provisional + flagged, never guessed into two.