"""Tolerant historical date parsing for the family archive.""" import datetime import re from dataclasses import dataclass from enum import StrEnum import config class Precision(StrEnum): DAY = "DAY" MONTH = "MONTH" SEASON = "SEASON" YEAR = "YEAR" RANGE = "RANGE" APPROX = "APPROX" UNKNOWN = "UNKNOWN" def _advent_sunday(year: int, n: int) -> datetime.date: """n-th Advent (1..4). 4th Advent = last Sunday on/before Dec 24.""" dec24 = datetime.date(year, 12, 24) back_to_sunday = (dec24.weekday() - 6) % 7 # Mon=0..Sun=6 fourth = dec24 - datetime.timedelta(days=back_to_sunday) return fourth - datetime.timedelta(days=(4 - n) * 7) def resolve_feast_or_season(token: str, year: int): """Return (iso, Precision) for a known feast/season token, else None.""" key = " ".join(token.lower().split()).strip(" .") if key in config.MOVABLE_FEASTS: d = easter(year) + datetime.timedelta(days=config.MOVABLE_FEASTS[key]) return d.isoformat(), Precision.DAY if key in config.FIXED_FEASTS: month, day = config.FIXED_FEASTS[key] return datetime.date(year, month, day).isoformat(), Precision.DAY advent = {"1. advent": 1, "2. advent": 2, "3. advent": 3, "4. advent": 4, "advent": 1} if key in advent: return _advent_sunday(year, advent[key]).isoformat(), Precision.DAY if key in config.SEASON_MONTHS: return datetime.date(year, config.SEASON_MONTHS[key], 1).isoformat(), Precision.SEASON return None def expand_year(token: str): """Expand a 2/3/4-digit year string per the 1873–1957 century rule. None if ambiguous.""" token = token.strip() if not token.isdigit(): return None n, v = len(token), int(token) if n == 4: return v if n == 3: return 1000 + v if n == 2: if v <= config.TWO_DIGIT_19XX_MAX: return 1900 + v if v >= config.TWO_DIGIT_18XX_MIN: return 1800 + v return None return None @dataclass(frozen=True) class ParsedDate: iso: str | None precision: Precision raw: str _LEADING_MARKERS = re.compile( r"^(um|ca\.?|circa|etwa|wohl|vermutlich|nach|vor|anfang|mitte|ende)\s+", re.I) def _preprocess(raw: str): """Return (cleaned_string, approx_flag). Any uncertainty/qualifier marker -> approx.""" s = (raw or "").strip() if not s: return "", False low = s.lower() approx = ("?" in s) or any( m in low for m in ("um ", "ca.", "ca ", "circa", "etwa", "wohl", "vermutlich")) s = re.sub(r"\(\s*\?\s*\)", " ", s) # remove "(?)" s = s.replace("?", " ") s = re.sub(r",.*$", "", s) # drop trailing editorial note (", 2. Brief") stripped = _LEADING_MARKERS.sub("", s) if stripped != s: # a leading qualifier (um/ca/nach/vor/anfang/…) signals approximation approx = True s = re.sub(r"\s+", " ", stripped).strip(" .,") return s, approx _NUM_RE = re.compile(r"(\d{1,2})[./](\d{1,2})\.?\s*(\d{2,4})") def _match_iso(s): if re.fullmatch(r"\d{4}-\d{2}-\d{2}", s): try: datetime.date.fromisoformat(s) return s, Precision.DAY except ValueError: return None return None def _match_numeric(s): m = _NUM_RE.fullmatch(s) if not m: return None day, month = int(m.group(1)), int(m.group(2)) year = expand_year(m.group(3)) if year is None or not (1 <= month <= 12): return None try: return datetime.date(year, month, day).isoformat(), Precision.DAY except ValueError: return None _YEAR_ONLY_RE = re.compile(r"\d{4}") def _match_year_only(s): if _YEAR_ONLY_RE.fullmatch(s): return datetime.date(int(s), 1, 1).isoformat(), Precision.YEAR return None # Matchers are tried in order. Later tasks append to this list. _MATCHERS = [_match_iso, _match_numeric, _match_year_only] def parse_date(raw: str, date_overrides: dict | None = None) -> ParsedDate: if date_overrides: key = (raw or "").strip() if key in date_overrides: iso, prec = date_overrides[key] return ParsedDate(iso or None, Precision(prec), raw) cleaned, approx = _preprocess(raw) if not cleaned: return ParsedDate(None, Precision.UNKNOWN, raw) for matcher in _MATCHERS: result = matcher(cleaned) if result: iso, precision = result if approx: precision = Precision.APPROX return ParsedDate(iso, precision, raw) return ParsedDate(None, Precision.UNKNOWN, raw) def easter(year: int) -> datetime.date: """Easter Sunday (Gregorian) via the Anonymous Gregorian / Butcher algorithm.""" a = year % 19 b = year // 100 c = year % 100 d = b // 4 e = b % 4 f = (b + 8) // 25 g = (b - f + 1) // 3 h = (19 * a + b - d - g + 15) % 30 i = c // 4 k = c % 4 l = (32 + 2 * e + 2 * i - h - k) % 7 m = (a + 11 * h + 22 * l) // 451 month = (h + l - 7 * m + 114) // 31 day = ((h + l - 7 * m + 114) % 31) + 1 return datetime.date(year, month, day)