From 1908dde859861f9ad13884983737a89837cff5c3 Mon Sep 17 00:00:00 2001 From: Marcel Date: Mon, 25 May 2026 13:27:26 +0200 Subject: [PATCH] feat(normalizer): year expansion century rule Co-Authored-By: Claude Opus 4.7 --- tools/import-normalizer/dates.py | 19 +++++++++++++++++++ tools/import-normalizer/tests/test_dates.py | 12 ++++++++++++ 2 files changed, 31 insertions(+) diff --git a/tools/import-normalizer/dates.py b/tools/import-normalizer/dates.py index 7b3fcc20..464092c1 100644 --- a/tools/import-normalizer/dates.py +++ b/tools/import-normalizer/dates.py @@ -39,6 +39,25 @@ def resolve_feast_or_season(token: str, year: int): return None +def expand_year(token: str): + """Expand a 2/3/4-digit year string per the 1873–1957 century rule. None if ambiguous.""" + token = token.strip() + if not token.isdigit(): + return None + n, v = len(token), int(token) + if n == 4: + return v + if n == 3: + return 1000 + v + if n == 2: + if v <= config.TWO_DIGIT_19XX_MAX: + return 1900 + v + if v >= config.TWO_DIGIT_18XX_MIN: + return 1800 + v + return None + return None + + def easter(year: int) -> datetime.date: """Easter Sunday (Gregorian) via the Anonymous Gregorian / Butcher algorithm.""" a = year % 19 diff --git a/tools/import-normalizer/tests/test_dates.py b/tools/import-normalizer/tests/test_dates.py index d834b02a..62fb79fa 100644 --- a/tools/import-normalizer/tests/test_dates.py +++ b/tools/import-normalizer/tests/test_dates.py @@ -24,3 +24,15 @@ def test_resolve_season(): def test_resolve_unknown_token_returns_none(): assert dates.resolve_feast_or_season("Freitag", 1919) is None + +def test_expand_year(): + assert dates.expand_year("1888") == 1888 + assert dates.expand_year("889") == 1889 # 3-digit -> 1DDD + assert dates.expand_year("923") == 1923 + assert dates.expand_year("08") == 1908 # 00..57 -> 19xx + assert dates.expand_year("17") == 1917 + assert dates.expand_year("57") == 1957 + assert dates.expand_year("73") == 1873 # 73..99 -> 18xx + assert dates.expand_year("99") == 1899 + assert dates.expand_year("65") is None # 58..72 ambiguous + assert dates.expand_year("x") is None