"""Normalize Personendatei 2.xlsx into canonical-persons-tree.json.""" import argparse import datetime import json import re import sys from pathlib import Path import config import dates from persons import _strip_accents def _parse_year(raw: str | None) -> int | None: """Extract a birth/death year from an Excel cell string. Handles four cases: 1. ISO string (openpyxl date-formatted cell) → parse_date() 2. Numeric string that is an Excel serial (1-80000) → timedelta conversion 3. Any other string → parse_date() 4. Unresolvable → None """ if raw is None: return None s = str(raw).strip() if not s: return None # Try parse_date first (handles ISO, DD.MM.YYYY, year-only, month+year, etc.) result = dates.parse_date(s) if result.iso: year = int(result.iso[:4]) # Reject years outside 1700-2100 (same guard as expand_year()) if 1700 <= year <= 2100: return year # If it's a pure integer string, try Excel serial conversion. # parse_date() may parse large serials like "7568" as year 7568 or other edge cases. if re.fullmatch(r"\d+", s): n = int(s) if 1 <= n <= 80_000: d = datetime.date(1899, 12, 30) + datetime.timedelta(days=n) if 1700 <= d.year <= 2100: return d.year return None