From 2d97595e9c898d1514ec63df43f8285b17732d01 Mon Sep 17 00:00:00 2001 From: Marcel Date: Mon, 25 May 2026 14:02:35 +0200 Subject: [PATCH] fix(normalizer): split_receivers returns [] for a geb.-only cell Co-Authored-By: Claude Opus 4.7 --- tools/import-normalizer/persons.py | 4 +++- tools/import-normalizer/tests/test_persons.py | 2 ++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/tools/import-normalizer/persons.py b/tools/import-normalizer/persons.py index 312df9d1..968cd7bb 100644 --- a/tools/import-normalizer/persons.py +++ b/tools/import-normalizer/persons.py @@ -94,7 +94,7 @@ _PAREN_RE = re.compile(r"\(([^)]+)\)\s*$") _MULTI_RE = re.compile(r"\s+(?:und|u)\s+", re.I) -def find_known_last_name(segment: str): +def find_known_last_name(segment: str) -> str | None: seg = segment.strip() for ln in config.KNOWN_LAST_NAMES: # config lists longest-first if seg == ln or seg.endswith(" " + ln): @@ -112,6 +112,8 @@ def split_receivers(raw: str) -> list[str]: out.extend(split_receivers(seg)) return out cleaned = _GEB_RE.sub("", raw).strip() + if not cleaned: # e.g. a "geb. Müller"-only cell strips to empty + return [] if not _MULTI_RE.search(cleaned): return [cleaned] shared_last = None diff --git a/tools/import-normalizer/tests/test_persons.py b/tools/import-normalizer/tests/test_persons.py index ea0d2409..2137509f 100644 --- a/tools/import-normalizer/tests/test_persons.py +++ b/tools/import-normalizer/tests/test_persons.py @@ -46,6 +46,8 @@ def test_split_receivers(): assert persons.split_receivers("Eugenie de Gruyter geb. Müller") == ["Eugenie de Gruyter"] assert persons.split_receivers("Herbert u Clara") == ["Herbert", "Clara"] assert persons.split_receivers("") == [] + assert persons.split_receivers("geb. Müller") == [] # maiden-only cell -> no person + assert persons.split_receivers("Herbert//Clara") == ["Herbert", "Clara"] # // separator def test_find_known_last_name(): assert persons.find_known_last_name("Eugenie de Gruyter") == "de Gruyter"