fix(normalizer): preserve trailing Bemerkung text after parent pattern
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -267,7 +267,13 @@ def _parse_bemerkung(
|
|||||||
if not m:
|
if not m:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
name_part = m.group(1).strip().rstrip("!., ")
|
# Split the captured group on the first comma or semicolon to separate
|
||||||
|
# the name part from any trailing description (e.g. ", nach Mexiko emigriert")
|
||||||
|
raw_names, _, trailing = m.group(1).strip().partition(",")
|
||||||
|
if not trailing:
|
||||||
|
raw_names, _, trailing = raw_names.partition(";")
|
||||||
|
name_part = raw_names.strip().rstrip("!., ")
|
||||||
|
remainder = trailing.strip().lstrip(".,! ")
|
||||||
parts = [p.strip() for p in _AND_RE.split(name_part) if p.strip()]
|
parts = [p.strip() for p in _AND_RE.split(name_part) if p.strip()]
|
||||||
rels: list[dict] = []
|
rels: list[dict] = []
|
||||||
unres: list[dict] = []
|
unres: list[dict] = []
|
||||||
@@ -300,7 +306,6 @@ def _parse_bemerkung(
|
|||||||
"reason": reason,
|
"reason": reason,
|
||||||
})
|
})
|
||||||
|
|
||||||
remainder = s[m.end():].strip().lstrip(".,! ")
|
|
||||||
return rels, unres, remainder
|
return rels, unres, remainder
|
||||||
|
|
||||||
# No pattern matched — full text goes to notes, nothing to unresolved
|
# No pattern matched — full text goes to notes, nothing to unresolved
|
||||||
|
|||||||
@@ -418,3 +418,16 @@ def test_parse_bemerkung_plain_remark():
|
|||||||
)
|
)
|
||||||
assert rels == [] and unres == []
|
assert rels == [] and unres == []
|
||||||
assert notes == "Verfasserin der Cram-Chronik !!"
|
assert notes == "Verfasserin der Cram-Chronik !!"
|
||||||
|
|
||||||
|
|
||||||
|
def test_parse_bemerkung_sohn_with_trailing_remark():
|
||||||
|
_, idx = _register(
|
||||||
|
("row_019", "Clara", "Cram", "de Gruyter"),
|
||||||
|
("row_028", "Herbert", "Cram", None),
|
||||||
|
)
|
||||||
|
rels, unres, notes = persons_tree._parse_bemerkung(
|
||||||
|
"row_021", "Sohn v Clara Cram u Herbert Cram, nach Mexiko emigriert", idx
|
||||||
|
)
|
||||||
|
assert len(rels) == 2
|
||||||
|
assert unres == []
|
||||||
|
assert notes == "nach Mexiko emigriert"
|
||||||
|
|||||||
Reference in New Issue
Block a user