fix(normalizer): split_receivers returns [] for a geb.-only cell
Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -94,7 +94,7 @@ _PAREN_RE = re.compile(r"\(([^)]+)\)\s*$")
|
||||
_MULTI_RE = re.compile(r"\s+(?:und|u)\s+", re.I)
|
||||
|
||||
|
||||
def find_known_last_name(segment: str):
|
||||
def find_known_last_name(segment: str) -> str | None:
|
||||
seg = segment.strip()
|
||||
for ln in config.KNOWN_LAST_NAMES: # config lists longest-first
|
||||
if seg == ln or seg.endswith(" " + ln):
|
||||
@@ -112,6 +112,8 @@ def split_receivers(raw: str) -> list[str]:
|
||||
out.extend(split_receivers(seg))
|
||||
return out
|
||||
cleaned = _GEB_RE.sub("", raw).strip()
|
||||
if not cleaned: # e.g. a "geb. Müller"-only cell strips to empty
|
||||
return []
|
||||
if not _MULTI_RE.search(cleaned):
|
||||
return [cleaned]
|
||||
shared_last = None
|
||||
|
||||
@@ -46,6 +46,8 @@ def test_split_receivers():
|
||||
assert persons.split_receivers("Eugenie de Gruyter geb. Müller") == ["Eugenie de Gruyter"]
|
||||
assert persons.split_receivers("Herbert u Clara") == ["Herbert", "Clara"]
|
||||
assert persons.split_receivers("") == []
|
||||
assert persons.split_receivers("geb. Müller") == [] # maiden-only cell -> no person
|
||||
assert persons.split_receivers("Herbert//Clara") == ["Herbert", "Clara"] # // separator
|
||||
|
||||
def test_find_known_last_name():
|
||||
assert persons.find_known_last_name("Eugenie de Gruyter") == "de Gruyter"
|
||||
|
||||
Reference in New Issue
Block a user