feat(ocr): add metrics.py factory with test-scoped CollectorRegistry support
Encapsulates every custom OCR metric in an OcrMetrics frozen dataclass and exposes a `build_metrics(registry)` factory. Production main.py binds against the default REGISTRY; tests construct a fresh CollectorRegistry per case and monkeypatch main.metrics, so counter values stay isolated between tests (decision #3 on issue #652, Option A). Refs #652 Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -20,9 +20,11 @@ import pypdfium2 as pdfium
|
||||
from fastapi import FastAPI, Form, Header, HTTPException, UploadFile
|
||||
from fastapi.responses import StreamingResponse
|
||||
from PIL import Image
|
||||
from prometheus_client import REGISTRY
|
||||
from prometheus_fastapi_instrumentator import Instrumentator
|
||||
|
||||
from confidence import apply_confidence_markers, get_threshold
|
||||
from metrics import OcrMetrics, build_metrics
|
||||
from spell_check import correct_text, load_spell_checker
|
||||
from engines import kraken as kraken_engine
|
||||
from engines import surya as surya_engine
|
||||
@@ -38,6 +40,8 @@ logger = logging.getLogger(__name__)
|
||||
|
||||
_models_ready = False
|
||||
|
||||
metrics: OcrMetrics = build_metrics(REGISTRY)
|
||||
|
||||
ALLOWED_PDF_HOSTS = set(
|
||||
h.strip() for h in os.getenv("ALLOWED_PDF_HOSTS", "minio,localhost,127.0.0.1").split(",")
|
||||
)
|
||||
|
||||
92
ocr-service/metrics.py
Normal file
92
ocr-service/metrics.py
Normal file
@@ -0,0 +1,92 @@
|
||||
"""Prometheus metric definitions for the OCR service.
|
||||
|
||||
`build_metrics(registry)` returns a fresh `OcrMetrics` instance bound to the
|
||||
given `CollectorRegistry`. Production code calls it once at module load with
|
||||
the default `REGISTRY`; tests pass a per-test `CollectorRegistry()` to keep
|
||||
counter values isolated between cases (decision #3 on issue #652).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
|
||||
from prometheus_client import CollectorRegistry, Counter, Gauge, Histogram
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class OcrMetrics:
|
||||
"""Container for every custom OCR metric.
|
||||
|
||||
Counters and gauges are immutable references to `prometheus_client`
|
||||
instances. Mutating them (`.inc()`, `.observe()`, `.set()`) is safe;
|
||||
rebinding the field on the dataclass is not — use `build_metrics` to get
|
||||
a new container.
|
||||
"""
|
||||
|
||||
ocr_jobs_total: Counter
|
||||
ocr_pages_total: Counter
|
||||
ocr_skipped_pages_total: Counter
|
||||
ocr_words_total: Counter
|
||||
ocr_illegible_words_total: Counter
|
||||
ocr_processing_seconds: Histogram
|
||||
ocr_training_runs_total: Counter
|
||||
ocr_model_accuracy: Gauge
|
||||
ocr_models_ready: Gauge
|
||||
|
||||
|
||||
def build_metrics(registry: CollectorRegistry) -> OcrMetrics:
|
||||
"""Create one OcrMetrics instance bound to `registry`."""
|
||||
return OcrMetrics(
|
||||
ocr_jobs_total=Counter(
|
||||
"ocr_jobs_total",
|
||||
"Number of OCR jobs processed, labelled by engine and script type.",
|
||||
["engine", "script_type"],
|
||||
registry=registry,
|
||||
),
|
||||
ocr_pages_total=Counter(
|
||||
"ocr_pages_total",
|
||||
"Number of pages successfully OCR'd, labelled by engine.",
|
||||
["engine"],
|
||||
registry=registry,
|
||||
),
|
||||
ocr_skipped_pages_total=Counter(
|
||||
"ocr_skipped_pages_total",
|
||||
"Number of pages skipped because the OCR engine raised.",
|
||||
registry=registry,
|
||||
),
|
||||
ocr_words_total=Counter(
|
||||
"ocr_words_total",
|
||||
"Number of words recognized across all OCR blocks.",
|
||||
registry=registry,
|
||||
),
|
||||
ocr_illegible_words_total=Counter(
|
||||
"ocr_illegible_words_total",
|
||||
"Number of words below the confidence threshold "
|
||||
"(replaced with [unleserlich]).",
|
||||
registry=registry,
|
||||
),
|
||||
ocr_processing_seconds=Histogram(
|
||||
"ocr_processing_seconds",
|
||||
"OCR processing time per page (streaming) or per document (non-streaming).",
|
||||
["engine"],
|
||||
registry=registry,
|
||||
),
|
||||
ocr_training_runs_total=Counter(
|
||||
"ocr_training_runs_total",
|
||||
"Number of training runs, labelled by kind (recognition|segmentation) "
|
||||
"and outcome (success|error).",
|
||||
["kind", "outcome"],
|
||||
registry=registry,
|
||||
),
|
||||
ocr_model_accuracy=Gauge(
|
||||
"ocr_model_accuracy",
|
||||
"Latest model accuracy reported by a successful training run.",
|
||||
["kind"],
|
||||
registry=registry,
|
||||
),
|
||||
ocr_models_ready=Gauge(
|
||||
"ocr_models_ready",
|
||||
"1 once the lifespan startup has finished loading models, 0 before.",
|
||||
registry=registry,
|
||||
),
|
||||
)
|
||||
@@ -9,8 +9,19 @@ from unittest.mock import AsyncMock, patch
|
||||
import pytest
|
||||
from httpx import ASGITransport, AsyncClient
|
||||
from PIL import Image
|
||||
from prometheus_client import CollectorRegistry
|
||||
|
||||
from main import app
|
||||
from metrics import build_metrics
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def fresh_metrics(monkeypatch):
|
||||
"""Replace the module-level `main.metrics` with one bound to a fresh registry."""
|
||||
registry = CollectorRegistry()
|
||||
test_metrics = build_metrics(registry)
|
||||
monkeypatch.setattr("main.metrics", test_metrics)
|
||||
return test_metrics
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@@ -56,3 +67,27 @@ async def test_metrics_includes_http_request_metrics_after_ocr_call():
|
||||
body = metrics_response.text
|
||||
assert "http_requests_total" in body
|
||||
assert "http_request_duration_seconds" in body
|
||||
|
||||
|
||||
def test_build_metrics_registers_all_custom_metrics_on_given_registry():
|
||||
"""`build_metrics` returns an OcrMetrics bound to the supplied registry."""
|
||||
registry = CollectorRegistry()
|
||||
metrics = build_metrics(registry)
|
||||
|
||||
metric_names = {m.name for m in registry.collect()}
|
||||
expected = {
|
||||
"ocr_jobs",
|
||||
"ocr_pages",
|
||||
"ocr_skipped_pages",
|
||||
"ocr_words",
|
||||
"ocr_illegible_words",
|
||||
"ocr_processing_seconds",
|
||||
"ocr_training_runs",
|
||||
"ocr_model_accuracy",
|
||||
"ocr_models_ready",
|
||||
}
|
||||
assert expected <= metric_names, f"missing: {expected - metric_names}"
|
||||
|
||||
# A second registry yields a separate container — no shared state.
|
||||
other_metrics = build_metrics(CollectorRegistry())
|
||||
assert metrics is not other_metrics
|
||||
|
||||
Reference in New Issue
Block a user