feat(normalizer): unresolved-names report + fix ambiguous-pair over-flagging

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
Marcel
2026-05-25 15:54:37 +02:00
parent f10b80a03f
commit 97ab9e38df
4 changed files with 44 additions and 18 deletions

View File

@@ -264,12 +264,14 @@ class AliasIndex:
class ResolutionContext:
"""Resolves raw name strings to person ids; accumulates provisional persons and review data."""
def __init__(self, alias_index: AliasIndex, name_overrides: dict[str, str]):
def __init__(self, alias_index: AliasIndex, name_overrides: dict[str, str],
given_names: set[str] | None = None):
self.index = alias_index
self.name_overrides = name_overrides
self.given_names = given_names or set()
self.provisional: dict[str, Person] = {}
self.unmatched: dict[str, list] = {}
self.ambiguous: list[tuple] = []
self.unresolved: list[tuple] = [] # (raw_name, category, source_row) for non-RESOLVABLE names
self._raw_to_pid: dict[str, str] = {}
self.override_hits = 0
@@ -296,6 +298,9 @@ class ResolutionContext:
return pid, self.index.display(pid) or name, True
# provisional person (unmatched) — never reuse a register id
self.unmatched.setdefault(name, []).append(source_row)
category = classify_name(name, self.given_names)
if category is not NameClass.RESOLVABLE:
self.unresolved.append((name, str(category), source_row))
if name in self._raw_to_pid:
return self._raw_to_pid[name], name, False
last, first = _last_first(name)
@@ -315,13 +320,7 @@ class ResolutionContext:
return pid, name, matched, len(parts) > 1
def resolve_receivers(self, raw: str, source_row: int):
results = []
for part in split_receivers(raw):
pid, name, matched = self.resolve_one(part, source_row)
if not matched and " " in part and find_known_last_name(part) is None and len(part.split()) == 2:
self.ambiguous.append((raw, part, source_row))
results.append((pid, name, matched))
return results
return [self.resolve_one(part, source_row) for part in split_receivers(raw)]
def _last_first(name: str):