feat(nlp-service): role detection (sender/receiver/any)
This commit is contained in:
@@ -117,3 +117,68 @@ def test_extract_person_names_ignores_non_per(nlp_de):
|
||||
# DATE entity should not appear in personNames
|
||||
doc = _make_doc_with_ents(nlp_de, "Briefe 1920", [(7, 11, "DATE")])
|
||||
assert extract_person_names(doc) == []
|
||||
|
||||
|
||||
# ── Role detection ───────────────────────────────────────────────────────────
|
||||
|
||||
def test_role_sender_von(nlp_de):
|
||||
from extractor import detect_person_role
|
||||
# "Briefe von Marie" — "von" immediately before "Marie"
|
||||
# "Marie" = chars 11..16
|
||||
doc = _make_doc_with_ents(nlp_de, "Briefe von Marie", [(11, 16, "PER")])
|
||||
per_spans = list(doc.ents)
|
||||
assert detect_person_role(doc, per_spans, "de") == "sender"
|
||||
|
||||
|
||||
def test_role_receiver_an(nlp_de):
|
||||
from extractor import detect_person_role
|
||||
# "Briefe an Marie" — "an" immediately before "Marie"
|
||||
# "Marie" = chars 10..15
|
||||
doc = _make_doc_with_ents(nlp_de, "Briefe an Marie", [(10, 15, "PER")])
|
||||
per_spans = list(doc.ents)
|
||||
assert detect_person_role(doc, per_spans, "de") == "receiver"
|
||||
|
||||
|
||||
def test_role_two_persons_returns_any(nlp_de):
|
||||
from extractor import detect_person_role
|
||||
# "von Opa an Marie" — two PER spans → always "any"
|
||||
# "Opa" = chars 4..7, "Marie" = chars 11..16
|
||||
doc = _make_doc_with_ents(nlp_de, "von Opa an Marie", [
|
||||
(4, 7, "PER"),
|
||||
(11, 16, "PER"),
|
||||
])
|
||||
per_spans = list(doc.ents)
|
||||
assert detect_person_role(doc, per_spans, "de") == "any"
|
||||
|
||||
|
||||
def test_role_no_prep_returns_any(nlp_de):
|
||||
from extractor import detect_person_role
|
||||
# "Briefe Marie" — no preposition
|
||||
# "Marie" = chars 7..12
|
||||
doc = _make_doc_with_ents(nlp_de, "Briefe Marie", [(7, 12, "PER")])
|
||||
per_spans = list(doc.ents)
|
||||
assert detect_person_role(doc, per_spans, "de") == "any"
|
||||
|
||||
|
||||
def test_role_empty_returns_any(nlp_de):
|
||||
from extractor import detect_person_role
|
||||
doc = _make_doc_with_ents(nlp_de, "Briefe 1920", [])
|
||||
assert detect_person_role(doc, [], "de") == "any"
|
||||
|
||||
|
||||
def test_role_sender_from_english(nlp_en):
|
||||
from extractor import detect_person_role
|
||||
# "letters from Marie" — "from" before "Marie"
|
||||
# "Marie" = chars 13..18
|
||||
doc = _make_doc_with_ents(nlp_en, "letters from Marie", [(13, 18, "PER")])
|
||||
per_spans = list(doc.ents)
|
||||
assert detect_person_role(doc, per_spans, "en") == "sender"
|
||||
|
||||
|
||||
def test_role_receiver_to_english(nlp_en):
|
||||
from extractor import detect_person_role
|
||||
# "letters to Marie" — "to" before "Marie"
|
||||
# "letters" = 0..7, " " = 7, "to" = 8..10, " " = 10, "Marie" = 11..16
|
||||
doc = _make_doc_with_ents(nlp_en, "letters to Marie", [(11, 16, "PER")])
|
||||
per_spans = list(doc.ents)
|
||||
assert detect_person_role(doc, per_spans, "en") == "receiver"
|
||||
|
||||
Reference in New Issue
Block a user