diff --git a/tools/import-normalizer/persons_tree.py b/tools/import-normalizer/persons_tree.py index 68e77ffb..e346d8ab 100644 --- a/tools/import-normalizer/persons_tree.py +++ b/tools/import-normalizer/persons_tree.py @@ -66,3 +66,11 @@ def _parse_year(raw: str | None) -> int | None: return d.year return None + + +def _parse_generation(raw: str | None) -> int | None: + """Extract the generation integer from column A values like 'G 3', 'G3', 'G 0'.""" + if not raw: + return None + m = re.search(r"\d+", str(raw)) + return int(m.group()) if m else None diff --git a/tools/import-normalizer/tests/test_persons_tree.py b/tools/import-normalizer/tests/test_persons_tree.py index 30d51b3b..bfb7d908 100644 --- a/tools/import-normalizer/tests/test_persons_tree.py +++ b/tools/import-normalizer/tests/test_persons_tree.py @@ -59,3 +59,27 @@ def test_parse_year_bare_out_of_range_year_is_none(): # parse_date("1023") parses it as year 1023 (out of 1700-2100 guard). # The serial branch must NOT re-interpret it as a serial. assert persons_tree._parse_year("1023") is None + + +def test_parse_generation_space(): + assert persons_tree._parse_generation("G 3") == 3 + + +def test_parse_generation_no_space(): + assert persons_tree._parse_generation("G3") == 3 + + +def test_parse_generation_extra_spaces(): + assert persons_tree._parse_generation("G 0") == 0 + + +def test_parse_generation_trailing_garbage(): + assert persons_tree._parse_generation("G 2 de Gruyter") == 2 + + +def test_parse_generation_empty(): + assert persons_tree._parse_generation("") is None + + +def test_parse_generation_none(): + assert persons_tree._parse_generation(None) is None