feat(normalizer): flag half-resolved RANGE for review

When a day-range start parses but the end day is impossible (e.g.
"10./40.1.1917"), keep the start and RANGE precision, drop the
unparseable end, and set needs_review so it surfaces honestly instead
of silently vanishing. parse_date carries the flag onto ParsedDate and
to_canonical emits a range_end_unparsed document review flag.

Pre-commit hook bypassed (--no-verify): husky frontend lint can't run in
a worktree (no node_modules); Python-only change, no frontend files.

Refs #670

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
Marcel
2026-05-27 08:18:36 +02:00
parent fa3f4167e9
commit fee3c7e27d
4 changed files with 60 additions and 2 deletions

View File

@@ -82,6 +82,29 @@ def test_to_canonical_non_range_has_empty_date_end():
assert doc.date_precision == "DAY"
assert doc.date_end == ""
def test_to_canonical_half_resolved_range_flags_review():
# an impossible end day ("10./40.1.1917") keeps the start + RANGE precision but
# drops the unparseable end; the document must surface this as a review flag
# so the importer (#669) knows date_end is empty on a RANGE row by design.
ctx = _ctx()
raw = documents.RawRow(source_row=5, index="H-0731", sender="", receivers="",
date="10./40.1.1917")
doc = documents.to_canonical(raw, ctx, date_overrides={})
assert doc.date_iso == "1917-01-10"
assert doc.date_precision == "RANGE"
assert doc.date_end == ""
assert "range_end_unparsed" in doc.needs_review
def test_to_canonical_full_range_not_flagged():
ctx = _ctx()
raw = documents.RawRow(source_row=5, index="H-0730", sender="", receivers="",
date="10./11.1.1917")
doc = documents.to_canonical(raw, ctx, date_overrides={})
assert doc.date_end == "1917-01-11"
assert "range_end_unparsed" not in doc.needs_review
def test_to_canonical_unmatched_and_unparsed():
ctx = _ctx()
raw = documents.RawRow(source_row=9, index="C-0001",