diff --git a/tools/import-normalizer/dates.py b/tools/import-normalizer/dates.py index d1dc81aa..37d39c58 100644 --- a/tools/import-normalizer/dates.py +++ b/tools/import-normalizer/dates.py @@ -69,6 +69,20 @@ class ParsedDate: end: str | None = None # RANGE end day; None for every non-RANGE precision +@dataclass(frozen=True) +class MatchResult: + """Uniform return shape for every _match_* matcher. + + A matcher returns None when it does not match, or a MatchResult when it does. + `end` is the RANGE end day (None for every non-RANGE precision); `needs_review` + is True only for a half-resolved RANGE whose start parsed but end did not. + """ + iso: str + precision: Precision + end: str | None = None + needs_review: bool = False + + _LEADING_MARKERS = re.compile( r"^(um|ca\.?|circa|etwa|wohl|vermutlich|nach|vor|anfang|mitte|ende)\s+", re.I) @@ -98,7 +112,7 @@ def _match_iso(s): if re.fullmatch(r"\d{4}-\d{2}-\d{2}", s): try: datetime.date.fromisoformat(s) - return s, Precision.DAY + return MatchResult(s, Precision.DAY) except ValueError: return None return None @@ -113,7 +127,7 @@ def _match_numeric(s): if year is None or not (1 <= month <= 12): return None try: - return datetime.date(year, month, day).isoformat(), Precision.DAY + return MatchResult(datetime.date(year, month, day).isoformat(), Precision.DAY) except ValueError: return None @@ -131,7 +145,7 @@ def _match_roman(s): if not month or year is None: return None try: - return datetime.date(year, month, day).isoformat(), Precision.DAY + return MatchResult(datetime.date(year, month, day).isoformat(), Precision.DAY) except ValueError: return None @@ -147,7 +161,7 @@ def _build_day_month_year(day, month, year): if not month or year is None or not (1 <= month <= 12): return None try: - return datetime.date(year, month, day).isoformat(), Precision.DAY + return MatchResult(datetime.date(year, month, day).isoformat(), Precision.DAY) except ValueError: return None @@ -189,7 +203,7 @@ def _match_month_year(s): year = expand_year(m.group(2)) if not month or year is None: return None - return datetime.date(year, month, 1).isoformat(), Precision.MONTH + return MatchResult(datetime.date(year, month, 1).isoformat(), Precision.MONTH) def _match_feast_season(s): @@ -199,19 +213,23 @@ def _match_feast_season(s): year = expand_year(m.group(2)) if year is None: return None - return resolve_feast_or_season(m.group(1), year) + resolved = resolve_feast_or_season(m.group(1), year) + if resolved is None: + return None + iso, precision = resolved + return MatchResult(iso, precision) def _match_year_only(s): if _YEAR_ONLY_RE.fullmatch(s): - return datetime.date(int(s), 1, 1).isoformat(), Precision.YEAR + return MatchResult(datetime.date(int(s), 1, 1).isoformat(), Precision.YEAR) return None def _match_range(s): m = _RANGE_YY_RE.fullmatch(s) if m: - return datetime.date(int(m.group(1)), 1, 1).isoformat(), Precision.RANGE, None + return MatchResult(datetime.date(int(m.group(1)), 1, 1).isoformat(), Precision.RANGE) m = _RANGE_DAY_RE.fullmatch(s) if m: day_start, day_end, rest = m.group(1), m.group(2), m.group(3) @@ -220,14 +238,15 @@ def _match_range(s): start = matcher(f"{day_start}.{rest}") if start: end = matcher(f"{day_end}.{rest}") - return start[0], Precision.RANGE, (end[0] if end else None) + return MatchResult(start.iso, Precision.RANGE, + end.iso if end else None) m = _RANGE_HYPHEN_RE.fullmatch(s) if m: start = m.group(1).strip() for matcher in (_match_numeric, _match_roman, _match_monthname_a, _match_year_only): r = matcher(start) if r: - return r[0], Precision.RANGE, None + return MatchResult(r.iso, Precision.RANGE) return None @@ -256,11 +275,8 @@ def parse_date(raw: str, date_overrides: dict | None = None) -> ParsedDate: for matcher in _MATCHERS: result = matcher(cleaned) if result: - iso, precision = result[0], result[1] - end = result[2] if len(result) > 2 else None - if approx: - precision = Precision.APPROX - return ParsedDate(iso, precision, raw, end) + precision = Precision.APPROX if approx else result.precision + return ParsedDate(result.iso, precision, raw, result.end) return ParsedDate(None, Precision.UNKNOWN, raw) diff --git a/tools/import-normalizer/tests/test_dates.py b/tools/import-normalizer/tests/test_dates.py index b380c7c7..bffb848c 100644 --- a/tools/import-normalizer/tests/test_dates.py +++ b/tools/import-normalizer/tests/test_dates.py @@ -2,6 +2,18 @@ import datetime import dates from dates import Precision +def test_matchers_return_uniform_matchresult(): + # Every matcher returns a MatchResult(iso, precision, end) — no 2- vs 3-tuple + # length-sniffing. A non-range matcher leaves end=None; a range matcher sets it. + day = dates._match_numeric("15.2.1888") + assert isinstance(day, dates.MatchResult) + assert (day.iso, day.precision, day.end) == ("1888-02-15", Precision.DAY, None) + + rng = dates._match_range("10./11.1.1917") + assert isinstance(rng, dates.MatchResult) + assert (rng.iso, rng.precision, rng.end) == ("1917-01-10", Precision.RANGE, "1917-01-11") + + def test_easter_known_years(): # Anonymous Gregorian algorithm — verified against published tables assert dates.easter(2024) == datetime.date(2024, 3, 31)