feat(ocr): add metrics.py factory with test-scoped CollectorRegistry support
Encapsulates every custom OCR metric in an OcrMetrics frozen dataclass and exposes a `build_metrics(registry)` factory. Production main.py binds against the default REGISTRY; tests construct a fresh CollectorRegistry per case and monkeypatch main.metrics, so counter values stay isolated between tests (decision #3 on issue #652, Option A). Refs #652 Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
92
ocr-service/metrics.py
Normal file
92
ocr-service/metrics.py
Normal file
@@ -0,0 +1,92 @@
|
||||
"""Prometheus metric definitions for the OCR service.
|
||||
|
||||
`build_metrics(registry)` returns a fresh `OcrMetrics` instance bound to the
|
||||
given `CollectorRegistry`. Production code calls it once at module load with
|
||||
the default `REGISTRY`; tests pass a per-test `CollectorRegistry()` to keep
|
||||
counter values isolated between cases (decision #3 on issue #652).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
|
||||
from prometheus_client import CollectorRegistry, Counter, Gauge, Histogram
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class OcrMetrics:
|
||||
"""Container for every custom OCR metric.
|
||||
|
||||
Counters and gauges are immutable references to `prometheus_client`
|
||||
instances. Mutating them (`.inc()`, `.observe()`, `.set()`) is safe;
|
||||
rebinding the field on the dataclass is not — use `build_metrics` to get
|
||||
a new container.
|
||||
"""
|
||||
|
||||
ocr_jobs_total: Counter
|
||||
ocr_pages_total: Counter
|
||||
ocr_skipped_pages_total: Counter
|
||||
ocr_words_total: Counter
|
||||
ocr_illegible_words_total: Counter
|
||||
ocr_processing_seconds: Histogram
|
||||
ocr_training_runs_total: Counter
|
||||
ocr_model_accuracy: Gauge
|
||||
ocr_models_ready: Gauge
|
||||
|
||||
|
||||
def build_metrics(registry: CollectorRegistry) -> OcrMetrics:
|
||||
"""Create one OcrMetrics instance bound to `registry`."""
|
||||
return OcrMetrics(
|
||||
ocr_jobs_total=Counter(
|
||||
"ocr_jobs_total",
|
||||
"Number of OCR jobs processed, labelled by engine and script type.",
|
||||
["engine", "script_type"],
|
||||
registry=registry,
|
||||
),
|
||||
ocr_pages_total=Counter(
|
||||
"ocr_pages_total",
|
||||
"Number of pages successfully OCR'd, labelled by engine.",
|
||||
["engine"],
|
||||
registry=registry,
|
||||
),
|
||||
ocr_skipped_pages_total=Counter(
|
||||
"ocr_skipped_pages_total",
|
||||
"Number of pages skipped because the OCR engine raised.",
|
||||
registry=registry,
|
||||
),
|
||||
ocr_words_total=Counter(
|
||||
"ocr_words_total",
|
||||
"Number of words recognized across all OCR blocks.",
|
||||
registry=registry,
|
||||
),
|
||||
ocr_illegible_words_total=Counter(
|
||||
"ocr_illegible_words_total",
|
||||
"Number of words below the confidence threshold "
|
||||
"(replaced with [unleserlich]).",
|
||||
registry=registry,
|
||||
),
|
||||
ocr_processing_seconds=Histogram(
|
||||
"ocr_processing_seconds",
|
||||
"OCR processing time per page (streaming) or per document (non-streaming).",
|
||||
["engine"],
|
||||
registry=registry,
|
||||
),
|
||||
ocr_training_runs_total=Counter(
|
||||
"ocr_training_runs_total",
|
||||
"Number of training runs, labelled by kind (recognition|segmentation) "
|
||||
"and outcome (success|error).",
|
||||
["kind", "outcome"],
|
||||
registry=registry,
|
||||
),
|
||||
ocr_model_accuracy=Gauge(
|
||||
"ocr_model_accuracy",
|
||||
"Latest model accuracy reported by a successful training run.",
|
||||
["kind"],
|
||||
registry=registry,
|
||||
),
|
||||
ocr_models_ready=Gauge(
|
||||
"ocr_models_ready",
|
||||
"1 once the lifespan startup has finished loading models, 0 before.",
|
||||
registry=registry,
|
||||
),
|
||||
)
|
||||
Reference in New Issue
Block a user