From 59715bdccdfcb8930edfe5cd67b98809710f73b4 Mon Sep 17 00:00:00 2001 From: Marcel Date: Mon, 25 May 2026 13:53:05 +0200 Subject: [PATCH] fix(normalizer): require day-dot in English month-first matcher (structural anti-shadow) Co-Authored-By: Claude Opus 4.7 --- tools/import-normalizer/dates.py | 3 ++- tools/import-normalizer/tests/test_dates.py | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/tools/import-normalizer/dates.py b/tools/import-normalizer/dates.py index b411b494..b4eaca6a 100644 --- a/tools/import-normalizer/dates.py +++ b/tools/import-normalizer/dates.py @@ -157,7 +157,8 @@ def _match_monthname_a(s): return _build_day_month_year(int(m.group(1)), _lookup_month(m.group(2)), expand_year(m.group(3))) -_MONTH_B_RE = re.compile(r"([A-Za-zÄÖÜäöü]+)\.?\s*(\d{1,2})\.?\s*(\d{2,4})") +# dot after day is REQUIRED so this can't match "Mai 1895" (MONTH YYYY) as day=18 +_MONTH_B_RE = re.compile(r"([A-Za-zÄÖÜäöü]+)\.?\s*(\d{1,2})\.\s*(\d{2,4})") def _match_monthname_b(s): diff --git a/tools/import-normalizer/tests/test_dates.py b/tools/import-normalizer/tests/test_dates.py index b0953d24..a08b6b61 100644 --- a/tools/import-normalizer/tests/test_dates.py +++ b/tools/import-normalizer/tests/test_dates.py @@ -81,7 +81,7 @@ def test_parse_monthname_day_first(): def test_parse_month_year_year_only(): assert dates.parse_date("Mai 1895") == dates.ParsedDate("1895-05-01", Precision.MONTH, "Mai 1895") - assert dates.parse_date("October 1903").iso == "1903-10-01" + assert dates.parse_date("October 1903") == dates.ParsedDate("1903-10-01", Precision.MONTH, "October 1903") assert dates.parse_date("1905") == dates.ParsedDate("1905-01-01", Precision.YEAR, "1905") def test_parse_feast_and_season_via_parse_date():