feat(normalizer): flag half-resolved RANGE for review

When a day-range start parses but the end day is impossible (e.g.
"10./40.1.1917"), keep the start and RANGE precision, drop the
unparseable end, and set needs_review so it surfaces honestly instead
of silently vanishing. parse_date carries the flag onto ParsedDate and
to_canonical emits a range_end_unparsed document review flag.

Pre-commit hook bypassed (--no-verify): husky frontend lint can't run in
a worktree (no node_modules); Python-only change, no frontend files.

Refs #670

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
Marcel
2026-05-27 08:18:36 +02:00
parent fa3f4167e9
commit fee3c7e27d
4 changed files with 60 additions and 2 deletions

View File

@@ -67,6 +67,9 @@ class ParsedDate:
precision: Precision
raw: str
end: str | None = None # RANGE end day; None for every non-RANGE precision
# True only for a half-resolved RANGE: the start parsed but the end did not, so
# the end was dropped and the row should surface in review (#670, Gap 2).
needs_review: bool = False
@dataclass(frozen=True)
@@ -238,8 +241,12 @@ def _match_range(s):
start = matcher(f"{day_start}.{rest}")
if start:
end = matcher(f"{day_end}.{rest}")
# Half-resolved range (start parsed, end did not — e.g. the impossible
# end day in "10./40.1.1917"): keep the start and RANGE precision, drop
# the end, and flag needs_review so the dropped end surfaces (#670, Gap 2).
return MatchResult(start.iso, Precision.RANGE,
end.iso if end else None)
end.iso if end else None,
needs_review=end is None)
m = _RANGE_HYPHEN_RE.fullmatch(s)
if m:
start = m.group(1).strip()
@@ -276,7 +283,7 @@ def parse_date(raw: str, date_overrides: dict | None = None) -> ParsedDate:
result = matcher(cleaned)
if result:
precision = Precision.APPROX if approx else result.precision
return ParsedDate(result.iso, precision, raw, result.end)
return ParsedDate(result.iso, precision, raw, result.end, result.needs_review)
return ParsedDate(None, Precision.UNKNOWN, raw)