fix(normalizer): preserve trailing Bemerkung text after parent pattern
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -267,7 +267,13 @@ def _parse_bemerkung(
|
||||
if not m:
|
||||
continue
|
||||
|
||||
name_part = m.group(1).strip().rstrip("!., ")
|
||||
# Split the captured group on the first comma or semicolon to separate
|
||||
# the name part from any trailing description (e.g. ", nach Mexiko emigriert")
|
||||
raw_names, _, trailing = m.group(1).strip().partition(",")
|
||||
if not trailing:
|
||||
raw_names, _, trailing = raw_names.partition(";")
|
||||
name_part = raw_names.strip().rstrip("!., ")
|
||||
remainder = trailing.strip().lstrip(".,! ")
|
||||
parts = [p.strip() for p in _AND_RE.split(name_part) if p.strip()]
|
||||
rels: list[dict] = []
|
||||
unres: list[dict] = []
|
||||
@@ -300,7 +306,6 @@ def _parse_bemerkung(
|
||||
"reason": reason,
|
||||
})
|
||||
|
||||
remainder = s[m.end():].strip().lstrip(".,! ")
|
||||
return rels, unres, remainder
|
||||
|
||||
# No pattern matched — full text goes to notes, nothing to unresolved
|
||||
|
||||
Reference in New Issue
Block a user