feat(nlp-service): date range extraction with direction detection
This commit is contained in:
@@ -182,3 +182,72 @@ def test_role_receiver_to_english(nlp_en):
|
||||
doc = _make_doc_with_ents(nlp_en, "letters to Marie", [(11, 16, "PER")])
|
||||
per_spans = list(doc.ents)
|
||||
assert detect_person_role(doc, per_spans, "en") == "receiver"
|
||||
|
||||
|
||||
# ── Date parsing ─────────────────────────────────────────────────────────────
|
||||
|
||||
def test_date_vor_1920(nlp_de):
|
||||
from extractor import extract_dates
|
||||
# "Briefe vor 1920" — "1920" at chars 11..15
|
||||
doc = _make_doc_with_ents(nlp_de, "Briefe vor 1920", [(11, 15, "DATE")])
|
||||
date_from, date_to = extract_dates(doc, "de")
|
||||
assert date_from is None
|
||||
assert date_to == "1920-12-31"
|
||||
|
||||
|
||||
def test_date_nach_1900(nlp_de):
|
||||
from extractor import extract_dates
|
||||
# "Briefe nach 1900" — "1900" at chars 12..16
|
||||
doc = _make_doc_with_ents(nlp_de, "Briefe nach 1900", [(12, 16, "DATE")])
|
||||
date_from, date_to = extract_dates(doc, "de")
|
||||
assert date_from == "1900-01-01"
|
||||
assert date_to is None
|
||||
|
||||
|
||||
def test_date_zwischen_1900_und_1920(nlp_de):
|
||||
from extractor import extract_dates
|
||||
# "zwischen 1900 und 1920"
|
||||
# "1900" = chars 9..13, "1920" = chars 18..22
|
||||
doc = _make_doc_with_ents(nlp_de, "zwischen 1900 und 1920", [
|
||||
(9, 13, "DATE"),
|
||||
(18, 22, "DATE"),
|
||||
])
|
||||
date_from, date_to = extract_dates(doc, "de")
|
||||
assert date_from == "1900-01-01"
|
||||
assert date_to == "1920-12-31"
|
||||
|
||||
|
||||
def test_date_bare_year_makes_range(nlp_de):
|
||||
from extractor import extract_dates
|
||||
# "Briefe 1920" — no direction token → year-range
|
||||
# "1920" = chars 7..11
|
||||
doc = _make_doc_with_ents(nlp_de, "Briefe 1920", [(7, 11, "DATE")])
|
||||
date_from, date_to = extract_dates(doc, "de")
|
||||
assert date_from == "1920-01-01"
|
||||
assert date_to == "1920-12-31"
|
||||
|
||||
|
||||
def test_date_no_date_entity(nlp_de):
|
||||
from extractor import extract_dates
|
||||
doc = _make_doc_with_ents(nlp_de, "Briefe von Opa", [])
|
||||
date_from, date_to = extract_dates(doc, "de")
|
||||
assert date_from is None
|
||||
assert date_to is None
|
||||
|
||||
|
||||
def test_date_before_english(nlp_en):
|
||||
from extractor import extract_dates
|
||||
# "letters before 1920" — "1920" at chars 15..19
|
||||
doc = _make_doc_with_ents(nlp_en, "letters before 1920", [(15, 19, "DATE")])
|
||||
date_from, date_to = extract_dates(doc, "en")
|
||||
assert date_from is None
|
||||
assert date_to == "1920-12-31"
|
||||
|
||||
|
||||
def test_date_after_english(nlp_en):
|
||||
from extractor import extract_dates
|
||||
# "letters after 1900" — "1900" at chars 14..18
|
||||
doc = _make_doc_with_ents(nlp_en, "letters after 1900", [(14, 18, "DATE")])
|
||||
date_from, date_to = extract_dates(doc, "en")
|
||||
assert date_from == "1900-01-01"
|
||||
assert date_to is None
|
||||
|
||||
Reference in New Issue
Block a user