- Wire _EXTRA_SPAN_STOPS into _extract_persons_and_role so German function words (im, seine, ihre, dem, …) terminate name spans — fixes "Clara im" and "seine Kinder" leaking into personNames - Add _NON_NAME_TOKENS filter in PersonMatcher.load() to skip DB records whose first_name contains prepositions or possessives — filters 290 bad records (annotations like "an seine Eltern", "Eltern in", place references like "Enkel Cram aus Mexiko") that were causing exact Pass-2 matches - Remove spaCy model downloads from Dockerfile (no longer needed after the DB-backed matcher rewrite) Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
25 lines
558 B
Docker
25 lines
558 B
Docker
FROM python:3.11-slim
|
|
|
|
WORKDIR /app
|
|
|
|
RUN apt-get update && apt-get install -y --no-install-recommends \
|
|
curl \
|
|
&& rm -rf /var/lib/apt/lists/*
|
|
|
|
COPY requirements.txt .
|
|
RUN pip install --no-cache-dir -r requirements.txt
|
|
|
|
COPY . .
|
|
|
|
RUN useradd --no-create-home --shell /usr/sbin/nologin --uid 1001 nlp \
|
|
&& chown -R nlp:nlp /app
|
|
|
|
USER nlp
|
|
|
|
EXPOSE 8001
|
|
|
|
HEALTHCHECK --interval=30s --timeout=5s --start-period=15s --retries=3 \
|
|
CMD curl -f http://localhost:8001/health || exit 1
|
|
|
|
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8001"]
|