feat(nlp-service): wire NLP_FUZZY_THRESHOLD env var with 0-100 validation
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -9,10 +9,23 @@ from fastapi import FastAPI, HTTPException
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
from extractor import extract, get_person_matcher, set_person_matcher
|
||||
from extractor import extract, get_person_matcher, set_fuzzy_threshold, set_person_matcher
|
||||
from models import ParseRequest, ParseResponse
|
||||
from person_matcher import PersonMatcher
|
||||
|
||||
_DEFAULT_FUZZY_THRESHOLD = 80
|
||||
|
||||
|
||||
def _parse_fuzzy_threshold(val: str) -> int:
|
||||
"""Parse and validate NLP_FUZZY_THRESHOLD — must be integer in [0, 100]."""
|
||||
try:
|
||||
n = int(val)
|
||||
except ValueError:
|
||||
raise ValueError(f"NLP_FUZZY_THRESHOLD must be an integer, got: {val!r}")
|
||||
if not (0 <= n <= 100):
|
||||
raise ValueError(f"NLP_FUZZY_THRESHOLD must be between 0 and 100, got: {n}")
|
||||
return n
|
||||
|
||||
|
||||
def _load_persons_from_db(db_url: str) -> list[tuple[str | None, str | None]]:
|
||||
import psycopg2 # deferred — not available in test environments without a DB
|
||||
@@ -28,6 +41,10 @@ def _load_persons_from_db(db_url: str) -> list[tuple[str | None, str | None]]:
|
||||
|
||||
@asynccontextmanager
|
||||
async def lifespan(app: FastAPI):
|
||||
threshold_raw = os.environ.get("NLP_FUZZY_THRESHOLD", str(_DEFAULT_FUZZY_THRESHOLD))
|
||||
threshold = _parse_fuzzy_threshold(threshold_raw)
|
||||
set_fuzzy_threshold(threshold)
|
||||
|
||||
# Only initialise the matcher when nothing was pre-seeded (e.g., by tests).
|
||||
if get_person_matcher() is None:
|
||||
m = PersonMatcher()
|
||||
|
||||
Reference in New Issue
Block a user