From 7edc002ebbd5e30a7d98aea70fd04056f6065134 Mon Sep 17 00:00:00 2001 From: Marcel Date: Mon, 25 May 2026 13:38:32 +0200 Subject: [PATCH] feat(normalizer): roman-numeral month matcher Co-Authored-By: Claude Opus 4.7 --- tools/import-normalizer/dates.py | 20 +++++++++++++++++++- tools/import-normalizer/tests/test_dates.py | 6 ++++++ 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/tools/import-normalizer/dates.py b/tools/import-normalizer/dates.py index 0dc9aff4..75605688 100644 --- a/tools/import-normalizer/dates.py +++ b/tools/import-normalizer/dates.py @@ -116,8 +116,26 @@ def _match_numeric(s): return None +_ROMAN_RE = re.compile(r"(\d{1,2})\.\s*([IVXLC]+)\.?\s*(\d{2,4})", re.I) + + +def _match_roman(s): + m = _ROMAN_RE.fullmatch(s) + if not m: + return None + day = int(m.group(1)) + month = config.ROMAN_MONTHS.get(m.group(2).lower()) + year = expand_year(m.group(3)) + if not month or year is None: + return None + try: + return datetime.date(year, month, day).isoformat(), Precision.DAY + except ValueError: + return None + + # Matchers are tried in order. Later tasks append to this list. -_MATCHERS = [_match_iso, _match_numeric] +_MATCHERS = [_match_iso, _match_numeric, _match_roman] def parse_date(raw: str, date_overrides: dict | None = None) -> ParsedDate: diff --git a/tools/import-normalizer/tests/test_dates.py b/tools/import-normalizer/tests/test_dates.py index 8f6af99f..c520ca36 100644 --- a/tools/import-normalizer/tests/test_dates.py +++ b/tools/import-normalizer/tests/test_dates.py @@ -63,3 +63,9 @@ def test_parse_leading_qualifier_is_approx(): r = dates.parse_date("nach 1.5.1900") # qualifier stripped, numeric date salvaged, precision APPROX assert r.iso == "1900-05-01" assert r.precision == Precision.APPROX + +def test_parse_roman_months(): + assert dates.parse_date("22.III.18").iso == "1918-03-22" + assert dates.parse_date("19.XII.1954").iso == "1954-12-19" + assert dates.parse_date("1.III.27").iso == "1927-03-01" + assert dates.parse_date("22.III.18").precision == Precision.DAY