Files
familienarchiv/nlp-service/extractor.py

34 lines
881 B
Python

from __future__ import annotations
import re
from datetime import date
import dateparser
import spacy
from spacy.language import Language
from models import ParseResponse
# ── Language model registry ──────────────────────────────────────────────────
_MODEL_NAMES: dict[str, str] = {
"de": "de_core_news_sm",
"en": "en_core_web_sm",
"es": "es_core_news_sm",
}
_nlp_cache: dict[str, Language] = {}
def get_nlp(lang: str) -> Language:
if lang not in _MODEL_NAMES:
raise ValueError(f"Unsupported language: {lang!r}. Valid: {list(_MODEL_NAMES)}")
if lang not in _nlp_cache:
_nlp_cache[lang] = spacy.load(_MODEL_NAMES[lang])
return _nlp_cache[lang]
def load_all_models() -> None:
for lang in _MODEL_NAMES:
get_nlp(lang)