feat(nlp-service): spaCy model loading with get_nlp/load_all_models

This commit is contained in:
Marcel
2026-06-07 10:17:07 +02:00
parent e3b8e57746
commit 18f028e2dd
2 changed files with 74 additions and 0 deletions

33
nlp-service/extractor.py Normal file
View File

@@ -0,0 +1,33 @@
from __future__ import annotations
import re
from datetime import date
import dateparser
import spacy
from spacy.language import Language
from models import ParseResponse
# ── Language model registry ──────────────────────────────────────────────────
_MODEL_NAMES: dict[str, str] = {
"de": "de_core_news_sm",
"en": "en_core_web_sm",
"es": "es_core_news_sm",
}
_nlp_cache: dict[str, Language] = {}
def get_nlp(lang: str) -> Language:
if lang not in _MODEL_NAMES:
raise ValueError(f"Unsupported language: {lang!r}. Valid: {list(_MODEL_NAMES)}")
if lang not in _nlp_cache:
_nlp_cache[lang] = spacy.load(_MODEL_NAMES[lang])
return _nlp_cache[lang]
def load_all_models() -> None:
for lang in _MODEL_NAMES:
get_nlp(lang)