feat(normalizer): day-first month-name matcher

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
Marcel
2026-05-25 13:42:36 +02:00
parent 7edc002ebb
commit 4942c0ea07
2 changed files with 35 additions and 3 deletions

View File

@@ -134,8 +134,31 @@ def _match_roman(s):
return None
_MONTH_A_RE = re.compile(r"(\d{1,2})[.\s]*([A-Za-zÄÖÜäöü]+)\.?\s*(\d{2,4})")
def _lookup_month(token: str):
return config.MONTHS.get(token.lower().strip(" ."))
def _build_day_month_year(day, month, year):
if not month or year is None or not (1 <= month <= 12):
return None
try:
return datetime.date(year, month, day).isoformat(), Precision.DAY
except ValueError:
return None
def _match_monthname_a(s):
m = _MONTH_A_RE.fullmatch(s)
if not m:
return None
return _build_day_month_year(int(m.group(1)), _lookup_month(m.group(2)), expand_year(m.group(3)))
# Matchers are tried in order. Later tasks append to this list.
_MATCHERS = [_match_iso, _match_numeric, _match_roman]
_MATCHERS = [_match_iso, _match_numeric, _match_roman, _match_monthname_a]
def parse_date(raw: str, date_overrides: dict | None = None) -> ParsedDate: