feat(ocr): add metrics.py factory with test-scoped CollectorRegistry support

Encapsulates every custom OCR metric in an OcrMetrics frozen dataclass and
exposes a `build_metrics(registry)` factory. Production main.py binds against
the default REGISTRY; tests construct a fresh CollectorRegistry per case and
monkeypatch main.metrics, so counter values stay isolated between tests
(decision #3 on issue #652, Option A).

Refs #652

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Marcel
2026-05-21 16:02:20 +02:00
parent 4bb6685edb
commit f3e3545d06
3 changed files with 131 additions and 0 deletions

View File

@@ -20,9 +20,11 @@ import pypdfium2 as pdfium
from fastapi import FastAPI, Form, Header, HTTPException, UploadFile
from fastapi.responses import StreamingResponse
from PIL import Image
from prometheus_client import REGISTRY
from prometheus_fastapi_instrumentator import Instrumentator
from confidence import apply_confidence_markers, get_threshold
from metrics import OcrMetrics, build_metrics
from spell_check import correct_text, load_spell_checker
from engines import kraken as kraken_engine
from engines import surya as surya_engine
@@ -38,6 +40,8 @@ logger = logging.getLogger(__name__)
_models_ready = False
metrics: OcrMetrics = build_metrics(REGISTRY)
ALLOWED_PDF_HOSTS = set(
h.strip() for h in os.getenv("ALLOWED_PDF_HOSTS", "minio,localhost,127.0.0.1").split(",")
)