34 lines
881 B
Python
34 lines
881 B
Python
from __future__ import annotations
|
|
|
|
import re
|
|
from datetime import date
|
|
|
|
import dateparser
|
|
import spacy
|
|
from spacy.language import Language
|
|
|
|
from models import ParseResponse
|
|
|
|
# ── Language model registry ──────────────────────────────────────────────────
|
|
|
|
_MODEL_NAMES: dict[str, str] = {
|
|
"de": "de_core_news_sm",
|
|
"en": "en_core_web_sm",
|
|
"es": "es_core_news_sm",
|
|
}
|
|
|
|
_nlp_cache: dict[str, Language] = {}
|
|
|
|
|
|
def get_nlp(lang: str) -> Language:
|
|
if lang not in _MODEL_NAMES:
|
|
raise ValueError(f"Unsupported language: {lang!r}. Valid: {list(_MODEL_NAMES)}")
|
|
if lang not in _nlp_cache:
|
|
_nlp_cache[lang] = spacy.load(_MODEL_NAMES[lang])
|
|
return _nlp_cache[lang]
|
|
|
|
|
|
def load_all_models() -> None:
|
|
for lang in _MODEL_NAMES:
|
|
get_nlp(lang)
|