feat(normalizer): roman-numeral month matcher
Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -116,8 +116,26 @@ def _match_numeric(s):
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
_ROMAN_RE = re.compile(r"(\d{1,2})\.\s*([IVXLC]+)\.?\s*(\d{2,4})", re.I)
|
||||||
|
|
||||||
|
|
||||||
|
def _match_roman(s):
|
||||||
|
m = _ROMAN_RE.fullmatch(s)
|
||||||
|
if not m:
|
||||||
|
return None
|
||||||
|
day = int(m.group(1))
|
||||||
|
month = config.ROMAN_MONTHS.get(m.group(2).lower())
|
||||||
|
year = expand_year(m.group(3))
|
||||||
|
if not month or year is None:
|
||||||
|
return None
|
||||||
|
try:
|
||||||
|
return datetime.date(year, month, day).isoformat(), Precision.DAY
|
||||||
|
except ValueError:
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
# Matchers are tried in order. Later tasks append to this list.
|
# Matchers are tried in order. Later tasks append to this list.
|
||||||
_MATCHERS = [_match_iso, _match_numeric]
|
_MATCHERS = [_match_iso, _match_numeric, _match_roman]
|
||||||
|
|
||||||
|
|
||||||
def parse_date(raw: str, date_overrides: dict | None = None) -> ParsedDate:
|
def parse_date(raw: str, date_overrides: dict | None = None) -> ParsedDate:
|
||||||
|
|||||||
@@ -63,3 +63,9 @@ def test_parse_leading_qualifier_is_approx():
|
|||||||
r = dates.parse_date("nach 1.5.1900") # qualifier stripped, numeric date salvaged, precision APPROX
|
r = dates.parse_date("nach 1.5.1900") # qualifier stripped, numeric date salvaged, precision APPROX
|
||||||
assert r.iso == "1900-05-01"
|
assert r.iso == "1900-05-01"
|
||||||
assert r.precision == Precision.APPROX
|
assert r.precision == Precision.APPROX
|
||||||
|
|
||||||
|
def test_parse_roman_months():
|
||||||
|
assert dates.parse_date("22.III.18").iso == "1918-03-22"
|
||||||
|
assert dates.parse_date("19.XII.1954").iso == "1954-12-19"
|
||||||
|
assert dates.parse_date("1.III.27").iso == "1927-03-01"
|
||||||
|
assert dates.parse_date("22.III.18").precision == Precision.DAY
|
||||||
|
|||||||
Reference in New Issue
Block a user