feat(normalizer): unresolved-names report + fix ambiguous-pair over-flagging
Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -264,12 +264,14 @@ class AliasIndex:
|
||||
|
||||
class ResolutionContext:
|
||||
"""Resolves raw name strings to person ids; accumulates provisional persons and review data."""
|
||||
def __init__(self, alias_index: AliasIndex, name_overrides: dict[str, str]):
|
||||
def __init__(self, alias_index: AliasIndex, name_overrides: dict[str, str],
|
||||
given_names: set[str] | None = None):
|
||||
self.index = alias_index
|
||||
self.name_overrides = name_overrides
|
||||
self.given_names = given_names or set()
|
||||
self.provisional: dict[str, Person] = {}
|
||||
self.unmatched: dict[str, list] = {}
|
||||
self.ambiguous: list[tuple] = []
|
||||
self.unresolved: list[tuple] = [] # (raw_name, category, source_row) for non-RESOLVABLE names
|
||||
self._raw_to_pid: dict[str, str] = {}
|
||||
self.override_hits = 0
|
||||
|
||||
@@ -296,6 +298,9 @@ class ResolutionContext:
|
||||
return pid, self.index.display(pid) or name, True
|
||||
# provisional person (unmatched) — never reuse a register id
|
||||
self.unmatched.setdefault(name, []).append(source_row)
|
||||
category = classify_name(name, self.given_names)
|
||||
if category is not NameClass.RESOLVABLE:
|
||||
self.unresolved.append((name, str(category), source_row))
|
||||
if name in self._raw_to_pid:
|
||||
return self._raw_to_pid[name], name, False
|
||||
last, first = _last_first(name)
|
||||
@@ -315,13 +320,7 @@ class ResolutionContext:
|
||||
return pid, name, matched, len(parts) > 1
|
||||
|
||||
def resolve_receivers(self, raw: str, source_row: int):
|
||||
results = []
|
||||
for part in split_receivers(raw):
|
||||
pid, name, matched = self.resolve_one(part, source_row)
|
||||
if not matched and " " in part and find_known_last_name(part) is None and len(part.split()) == 2:
|
||||
self.ambiguous.append((raw, part, source_row))
|
||||
results.append((pid, name, matched))
|
||||
return results
|
||||
return [self.resolve_one(part, source_row) for part in split_receivers(raw)]
|
||||
|
||||
|
||||
def _last_first(name: str):
|
||||
|
||||
Reference in New Issue
Block a user