Files
familienarchiv/tools/import-normalizer/dates.py
2026-05-25 13:27:26 +02:00

78 lines
2.4 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""Tolerant historical date parsing for the family archive."""
import datetime
from enum import StrEnum
import config
class Precision(StrEnum):
DAY = "DAY"
MONTH = "MONTH"
SEASON = "SEASON"
YEAR = "YEAR"
RANGE = "RANGE"
APPROX = "APPROX"
UNKNOWN = "UNKNOWN"
def _advent_sunday(year: int, n: int) -> datetime.date:
"""n-th Advent (1..4). 4th Advent = last Sunday on/before Dec 24."""
dec24 = datetime.date(year, 12, 24)
back_to_sunday = (dec24.weekday() - 6) % 7 # Mon=0..Sun=6
fourth = dec24 - datetime.timedelta(days=back_to_sunday)
return fourth - datetime.timedelta(days=(4 - n) * 7)
def resolve_feast_or_season(token: str, year: int):
"""Return (iso, Precision) for a known feast/season token, else None."""
key = " ".join(token.lower().split()).strip(" .")
if key in config.MOVABLE_FEASTS:
d = easter(year) + datetime.timedelta(days=config.MOVABLE_FEASTS[key])
return d.isoformat(), Precision.DAY
if key in config.FIXED_FEASTS:
month, day = config.FIXED_FEASTS[key]
return datetime.date(year, month, day).isoformat(), Precision.DAY
advent = {"1. advent": 1, "2. advent": 2, "3. advent": 3, "4. advent": 4, "advent": 1}
if key in advent:
return _advent_sunday(year, advent[key]).isoformat(), Precision.DAY
if key in config.SEASON_MONTHS:
return datetime.date(year, config.SEASON_MONTHS[key], 1).isoformat(), Precision.SEASON
return None
def expand_year(token: str):
"""Expand a 2/3/4-digit year string per the 18731957 century rule. None if ambiguous."""
token = token.strip()
if not token.isdigit():
return None
n, v = len(token), int(token)
if n == 4:
return v
if n == 3:
return 1000 + v
if n == 2:
if v <= config.TWO_DIGIT_19XX_MAX:
return 1900 + v
if v >= config.TWO_DIGIT_18XX_MIN:
return 1800 + v
return None
return None
def easter(year: int) -> datetime.date:
"""Easter Sunday (Gregorian) via the Anonymous Gregorian / Butcher algorithm."""
a = year % 19
b = year // 100
c = year % 100
d = b // 4
e = b % 4
f = (b + 8) // 25
g = (b - f + 1) // 3
h = (19 * a + b - d - g + 15) % 30
i = c // 4
k = c % 4
l = (32 + 2 * e + 2 * i - h - k) % 7
m = (a + 11 * h + 22 * l) // 451
month = (h + l - 7 * m + 114) // 31
day = ((h + l - 7 * m + 114) % 31) + 1
return datetime.date(year, month, day)