feat(nlp-service): spaCy model loading with get_nlp/load_all_models
This commit is contained in:
33
nlp-service/extractor.py
Normal file
33
nlp-service/extractor.py
Normal file
@@ -0,0 +1,33 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from datetime import date
|
||||
|
||||
import dateparser
|
||||
import spacy
|
||||
from spacy.language import Language
|
||||
|
||||
from models import ParseResponse
|
||||
|
||||
# ── Language model registry ──────────────────────────────────────────────────
|
||||
|
||||
_MODEL_NAMES: dict[str, str] = {
|
||||
"de": "de_core_news_sm",
|
||||
"en": "en_core_web_sm",
|
||||
"es": "es_core_news_sm",
|
||||
}
|
||||
|
||||
_nlp_cache: dict[str, Language] = {}
|
||||
|
||||
|
||||
def get_nlp(lang: str) -> Language:
|
||||
if lang not in _MODEL_NAMES:
|
||||
raise ValueError(f"Unsupported language: {lang!r}. Valid: {list(_MODEL_NAMES)}")
|
||||
if lang not in _nlp_cache:
|
||||
_nlp_cache[lang] = spacy.load(_MODEL_NAMES[lang])
|
||||
return _nlp_cache[lang]
|
||||
|
||||
|
||||
def load_all_models() -> None:
|
||||
for lang in _MODEL_NAMES:
|
||||
get_nlp(lang)
|
||||
Reference in New Issue
Block a user