fix(normalizer): preserve trailing Bemerkung text after parent pattern

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Marcel
2026-05-25 21:12:45 +02:00
parent ace41ad209
commit 34c40cb0ee
2 changed files with 20 additions and 2 deletions

View File

@@ -267,7 +267,13 @@ def _parse_bemerkung(
if not m:
continue
name_part = m.group(1).strip().rstrip("!., ")
# Split the captured group on the first comma or semicolon to separate
# the name part from any trailing description (e.g. ", nach Mexiko emigriert")
raw_names, _, trailing = m.group(1).strip().partition(",")
if not trailing:
raw_names, _, trailing = raw_names.partition(";")
name_part = raw_names.strip().rstrip("!., ")
remainder = trailing.strip().lstrip(".,! ")
parts = [p.strip() for p in _AND_RE.split(name_part) if p.strip()]
rels: list[dict] = []
unres: list[dict] = []
@@ -300,7 +306,6 @@ def _parse_bemerkung(
"reason": reason,
})
remainder = s[m.end():].strip().lstrip(".,! ")
return rels, unres, remainder
# No pattern matched — full text goes to notes, nothing to unresolved