feat(normalizer): roman-numeral month matcher

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
Marcel
2026-05-25 13:38:32 +02:00
parent b43dd6cdd4
commit 7edc002ebb
2 changed files with 25 additions and 1 deletions

View File

@@ -116,8 +116,26 @@ def _match_numeric(s):
return None return None
_ROMAN_RE = re.compile(r"(\d{1,2})\.\s*([IVXLC]+)\.?\s*(\d{2,4})", re.I)
def _match_roman(s):
m = _ROMAN_RE.fullmatch(s)
if not m:
return None
day = int(m.group(1))
month = config.ROMAN_MONTHS.get(m.group(2).lower())
year = expand_year(m.group(3))
if not month or year is None:
return None
try:
return datetime.date(year, month, day).isoformat(), Precision.DAY
except ValueError:
return None
# Matchers are tried in order. Later tasks append to this list. # Matchers are tried in order. Later tasks append to this list.
_MATCHERS = [_match_iso, _match_numeric] _MATCHERS = [_match_iso, _match_numeric, _match_roman]
def parse_date(raw: str, date_overrides: dict | None = None) -> ParsedDate: def parse_date(raw: str, date_overrides: dict | None = None) -> ParsedDate:

View File

@@ -63,3 +63,9 @@ def test_parse_leading_qualifier_is_approx():
r = dates.parse_date("nach 1.5.1900") # qualifier stripped, numeric date salvaged, precision APPROX r = dates.parse_date("nach 1.5.1900") # qualifier stripped, numeric date salvaged, precision APPROX
assert r.iso == "1900-05-01" assert r.iso == "1900-05-01"
assert r.precision == Precision.APPROX assert r.precision == Precision.APPROX
def test_parse_roman_months():
assert dates.parse_date("22.III.18").iso == "1918-03-22"
assert dates.parse_date("19.XII.1954").iso == "1954-12-19"
assert dates.parse_date("1.III.27").iso == "1927-03-01"
assert dates.parse_date("22.III.18").precision == Precision.DAY