feat(normalizer): month/year, feast/season, range matchers + overrides
Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -78,3 +78,52 @@ def test_parse_monthname_day_first():
|
||||
assert dates.parse_date("18.Dez.1916").iso == "1916-12-18"
|
||||
assert dates.parse_date("4Dezember 1936").iso == "1936-12-04"
|
||||
assert dates.parse_date("25 August 1968").iso == "1968-08-25"
|
||||
|
||||
def test_parse_month_year_year_only():
|
||||
assert dates.parse_date("Mai 1895") == dates.ParsedDate("1895-05-01", Precision.MONTH, "Mai 1895")
|
||||
assert dates.parse_date("October 1903").iso == "1903-10-01"
|
||||
assert dates.parse_date("1905") == dates.ParsedDate("1905-01-01", Precision.YEAR, "1905")
|
||||
|
||||
def test_parse_feast_and_season_via_parse_date():
|
||||
assert dates.parse_date("Pfingsten 1922") == dates.ParsedDate("1922-06-04", Precision.DAY, "Pfingsten 1922")
|
||||
assert dates.parse_date("Herbst 1913") == dates.ParsedDate("1913-10-01", Precision.SEASON, "Herbst 1913")
|
||||
assert dates.parse_date("Pfingstsonntag 1915").precision == Precision.DAY
|
||||
|
||||
def test_parse_ranges():
|
||||
assert dates.parse_date("8.1.1916 - 15.3.1916") == dates.ParsedDate("1916-01-08", Precision.RANGE, "8.1.1916 - 15.3.1916")
|
||||
assert dates.parse_date("1881/82") == dates.ParsedDate("1881-01-01", Precision.RANGE, "1881/82")
|
||||
assert dates.parse_date("1945/46?").iso == "1945-01-01" # '?' stripped -> RANGE, then APPROX
|
||||
assert dates.parse_date("1945/46?").precision == Precision.APPROX
|
||||
|
||||
def test_parse_approx_full():
|
||||
r = dates.parse_date("17.Nov (?) 1887")
|
||||
assert r.iso == "1887-11-17"
|
||||
assert r.precision == Precision.APPROX
|
||||
|
||||
def test_parse_english_month_first_now_works():
|
||||
assert dates.parse_date("April 12. 1922").iso == "1922-04-12"
|
||||
assert dates.parse_date("Mai 1895").iso == "1895-05-01" # not shadowed by month-first matcher
|
||||
|
||||
def test_parse_unparseable_examples():
|
||||
assert dates.parse_date("Freitag 1919").precision == Precision.UNKNOWN
|
||||
|
||||
def test_parse_invalid_calendar_date_is_unknown():
|
||||
# try/except ValueError in the matchers must route impossible dates to UNKNOWN (-> review),
|
||||
# never silently clamp. This is the most likely real-data bug class at 7,600 rows.
|
||||
assert dates.parse_date("30.2.1888").precision == Precision.UNKNOWN
|
||||
assert dates.parse_date("31.4.1916").precision == Precision.UNKNOWN
|
||||
|
||||
def test_parse_intra_month_day_range():
|
||||
# "7./8. Sept.1923" -> start day, RANGE. Must NOT be confused with slash-date "17/6. 1916".
|
||||
assert dates.parse_date("7./8. Sept.1923") == dates.ParsedDate("1923-09-07", Precision.RANGE, "7./8. Sept.1923")
|
||||
assert dates.parse_date("17/6. 1916") == dates.ParsedDate("1916-06-17", Precision.DAY, "17/6. 1916")
|
||||
|
||||
def test_parse_trailing_note_stripped_but_raw_preserved():
|
||||
r = dates.parse_date("17.Nov 1887, 2. Brief") # REQ-DATE-04
|
||||
assert r.iso == "1887-11-17"
|
||||
assert "2. Brief" in r.raw # original string preserved verbatim
|
||||
|
||||
def test_parse_date_override_wins():
|
||||
ovr = {"13.5.65": ("1965-05-13", "DAY")}
|
||||
r = dates.parse_date("13.5.65", ovr) # ambiguous without override
|
||||
assert r == dates.ParsedDate("1965-05-13", Precision.DAY, "13.5.65")
|
||||
|
||||
Reference in New Issue
Block a user