feat(nlp-service): spaCy model loading with get_nlp/load_all_models
This commit is contained in:
33
nlp-service/extractor.py
Normal file
33
nlp-service/extractor.py
Normal file
@@ -0,0 +1,33 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from datetime import date
|
||||
|
||||
import dateparser
|
||||
import spacy
|
||||
from spacy.language import Language
|
||||
|
||||
from models import ParseResponse
|
||||
|
||||
# ── Language model registry ──────────────────────────────────────────────────
|
||||
|
||||
_MODEL_NAMES: dict[str, str] = {
|
||||
"de": "de_core_news_sm",
|
||||
"en": "en_core_web_sm",
|
||||
"es": "es_core_news_sm",
|
||||
}
|
||||
|
||||
_nlp_cache: dict[str, Language] = {}
|
||||
|
||||
|
||||
def get_nlp(lang: str) -> Language:
|
||||
if lang not in _MODEL_NAMES:
|
||||
raise ValueError(f"Unsupported language: {lang!r}. Valid: {list(_MODEL_NAMES)}")
|
||||
if lang not in _nlp_cache:
|
||||
_nlp_cache[lang] = spacy.load(_MODEL_NAMES[lang])
|
||||
return _nlp_cache[lang]
|
||||
|
||||
|
||||
def load_all_models() -> None:
|
||||
for lang in _MODEL_NAMES:
|
||||
get_nlp(lang)
|
||||
@@ -31,3 +31,44 @@ def test_parse_response_serializes_nulls():
|
||||
assert data["dateFrom"] is None
|
||||
assert data["dateTo"] == "1920-12-31"
|
||||
assert data["personRole"] == "sender"
|
||||
|
||||
|
||||
# ── Model loading ────────────────────────────────────────────────────────────
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def nlp_de():
|
||||
from extractor import get_nlp
|
||||
return get_nlp("de")
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def nlp_en():
|
||||
from extractor import get_nlp
|
||||
return get_nlp("en")
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def nlp_es():
|
||||
from extractor import get_nlp
|
||||
return get_nlp("es")
|
||||
|
||||
|
||||
def test_get_nlp_de_loads(nlp_de):
|
||||
doc = nlp_de("Test")
|
||||
assert doc is not None
|
||||
|
||||
|
||||
def test_get_nlp_en_loads(nlp_en):
|
||||
doc = nlp_en("Test")
|
||||
assert doc is not None
|
||||
|
||||
|
||||
def test_get_nlp_es_loads(nlp_es):
|
||||
doc = nlp_es("Prueba")
|
||||
assert doc is not None
|
||||
|
||||
|
||||
def test_get_nlp_unknown_lang_raises():
|
||||
from extractor import get_nlp
|
||||
with pytest.raises(ValueError, match="Unsupported language"):
|
||||
get_nlp("fr")
|
||||
|
||||
Reference in New Issue
Block a user