from __future__ import annotations import re from datetime import date import dateparser import spacy from spacy.language import Language from models import ParseResponse # ── Language model registry ────────────────────────────────────────────────── _MODEL_NAMES: dict[str, str] = { "de": "de_core_news_sm", "en": "en_core_web_sm", "es": "es_core_news_sm", } _nlp_cache: dict[str, Language] = {} def get_nlp(lang: str) -> Language: if lang not in _MODEL_NAMES: raise ValueError(f"Unsupported language: {lang!r}. Valid: {list(_MODEL_NAMES)}") if lang not in _nlp_cache: _nlp_cache[lang] = spacy.load(_MODEL_NAMES[lang]) return _nlp_cache[lang] def load_all_models() -> None: for lang in _MODEL_NAMES: get_nlp(lang) # ── Step 1: Person name extraction ────────────────────────────────────────── def extract_person_names(doc) -> list[str]: """Return PER entity texts in left-to-right span order.""" return [ent.text for ent in doc.ents if ent.label_ == "PER"]