Import normalizer: offline tool to normalize the raw archive spreadsheets #663

Merged
marcel merged 172 commits from docs/import-migration into main 2026-05-28 15:05:51 +02:00
2 changed files with 3 additions and 12 deletions
Showing only changes of commit b43dd6cdd4 - Show all commits

View File

@@ -116,17 +116,8 @@ def _match_numeric(s):
return None return None
_YEAR_ONLY_RE = re.compile(r"\d{4}")
def _match_year_only(s):
if _YEAR_ONLY_RE.fullmatch(s):
return datetime.date(int(s), 1, 1).isoformat(), Precision.YEAR
return None
# Matchers are tried in order. Later tasks append to this list. # Matchers are tried in order. Later tasks append to this list.
_MATCHERS = [_match_iso, _match_numeric, _match_year_only] _MATCHERS = [_match_iso, _match_numeric]
def parse_date(raw: str, date_overrides: dict | None = None) -> ParsedDate: def parse_date(raw: str, date_overrides: dict | None = None) -> ParsedDate:

View File

@@ -60,6 +60,6 @@ def test_parse_approx_marker_upgrades_precision():
assert r.precision == Precision.UNKNOWN # no month-name matcher until Task 7; full APPROX check in Task 8 assert r.precision == Precision.UNKNOWN # no month-name matcher until Task 7; full APPROX check in Task 8
def test_parse_leading_qualifier_is_approx(): def test_parse_leading_qualifier_is_approx():
r = dates.parse_date("nach 1900") # "after 1900" -> year salvaged, but precision is APPROX not exact r = dates.parse_date("nach 1.5.1900") # qualifier stripped, numeric date salvaged, precision APPROX
assert r.iso == "1900-01-01" assert r.iso == "1900-05-01"
assert r.precision == Precision.APPROX assert r.precision == Precision.APPROX