feat(normalizer): add generation parser to persons_tree
This commit is contained in:
@@ -66,3 +66,11 @@ def _parse_year(raw: str | None) -> int | None:
|
||||
return d.year
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def _parse_generation(raw: str | None) -> int | None:
|
||||
"""Extract the generation integer from column A values like 'G 3', 'G3', 'G 0'."""
|
||||
if not raw:
|
||||
return None
|
||||
m = re.search(r"\d+", str(raw))
|
||||
return int(m.group()) if m else None
|
||||
|
||||
@@ -59,3 +59,27 @@ def test_parse_year_bare_out_of_range_year_is_none():
|
||||
# parse_date("1023") parses it as year 1023 (out of 1700-2100 guard).
|
||||
# The serial branch must NOT re-interpret it as a serial.
|
||||
assert persons_tree._parse_year("1023") is None
|
||||
|
||||
|
||||
def test_parse_generation_space():
|
||||
assert persons_tree._parse_generation("G 3") == 3
|
||||
|
||||
|
||||
def test_parse_generation_no_space():
|
||||
assert persons_tree._parse_generation("G3") == 3
|
||||
|
||||
|
||||
def test_parse_generation_extra_spaces():
|
||||
assert persons_tree._parse_generation("G 0") == 0
|
||||
|
||||
|
||||
def test_parse_generation_trailing_garbage():
|
||||
assert persons_tree._parse_generation("G 2 de Gruyter") == 2
|
||||
|
||||
|
||||
def test_parse_generation_empty():
|
||||
assert persons_tree._parse_generation("") is None
|
||||
|
||||
|
||||
def test_parse_generation_none():
|
||||
assert persons_tree._parse_generation(None) is None
|
||||
|
||||
Reference in New Issue
Block a user