Encapsulates every custom OCR metric in an OcrMetrics frozen dataclass and exposes a `build_metrics(registry)` factory. Production main.py binds against the default REGISTRY; tests construct a fresh CollectorRegistry per case and monkeypatch main.metrics, so counter values stay isolated between tests (decision #3 on issue #652, Option A). Refs #652 Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
94 lines
3.5 KiB
Python
94 lines
3.5 KiB
Python
"""Tests for Prometheus metrics exposed by the OCR service.
|
|
|
|
Each test that asserts on a counter/gauge value uses a fresh CollectorRegistry
|
|
(see decision #3 on issue #652) to keep the metrics isolated between tests.
|
|
"""
|
|
|
|
from unittest.mock import AsyncMock, patch
|
|
|
|
import pytest
|
|
from httpx import ASGITransport, AsyncClient
|
|
from PIL import Image
|
|
from prometheus_client import CollectorRegistry
|
|
|
|
from main import app
|
|
from metrics import build_metrics
|
|
|
|
|
|
@pytest.fixture
|
|
def fresh_metrics(monkeypatch):
|
|
"""Replace the module-level `main.metrics` with one bound to a fresh registry."""
|
|
registry = CollectorRegistry()
|
|
test_metrics = build_metrics(registry)
|
|
monkeypatch.setattr("main.metrics", test_metrics)
|
|
return test_metrics
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_metrics_endpoint_returns_200():
|
|
"""`GET /metrics` returns 200 with Prometheus exposition content."""
|
|
with patch("main.kraken_engine.load_models"), \
|
|
patch("main.load_spell_checker"):
|
|
async with AsyncClient(transport=ASGITransport(app=app), base_url="http://test") as client:
|
|
response = await client.get("/metrics")
|
|
|
|
assert response.status_code == 200
|
|
assert "text/plain" in response.headers.get("content-type", "")
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_metrics_includes_http_request_metrics_after_ocr_call():
|
|
"""After a request to /ocr, `/metrics` exposes auto-instrumented http_* metrics."""
|
|
mock_images = [Image.new("RGB", (100, 100))]
|
|
mock_blocks = [{"pageNumber": 1, "x": 0.0, "y": 0.0, "width": 1.0, "height": 1.0,
|
|
"polygon": None, "text": "hi", "words": []}]
|
|
|
|
with patch("main.kraken_engine.load_models"), \
|
|
patch("main.load_spell_checker"), \
|
|
patch("main._download_and_convert_pdf", new_callable=AsyncMock, return_value=mock_images), \
|
|
patch("main.preprocess_page", side_effect=lambda img: img), \
|
|
patch("main.surya_engine.extract_blocks", return_value=mock_blocks):
|
|
async with AsyncClient(transport=ASGITransport(app=app), base_url="http://test") as client:
|
|
# Models need to be loaded for /ocr to accept requests; force the flag.
|
|
import main as main_module
|
|
main_module._models_ready = True
|
|
try:
|
|
ocr_response = await client.post("/ocr", json={
|
|
"pdfUrl": "http://minio/doc.pdf",
|
|
"scriptType": "TYPEWRITER",
|
|
"language": "de",
|
|
})
|
|
assert ocr_response.status_code == 200, ocr_response.text
|
|
|
|
metrics_response = await client.get("/metrics")
|
|
finally:
|
|
main_module._models_ready = False
|
|
|
|
body = metrics_response.text
|
|
assert "http_requests_total" in body
|
|
assert "http_request_duration_seconds" in body
|
|
|
|
|
|
def test_build_metrics_registers_all_custom_metrics_on_given_registry():
|
|
"""`build_metrics` returns an OcrMetrics bound to the supplied registry."""
|
|
registry = CollectorRegistry()
|
|
metrics = build_metrics(registry)
|
|
|
|
metric_names = {m.name for m in registry.collect()}
|
|
expected = {
|
|
"ocr_jobs",
|
|
"ocr_pages",
|
|
"ocr_skipped_pages",
|
|
"ocr_words",
|
|
"ocr_illegible_words",
|
|
"ocr_processing_seconds",
|
|
"ocr_training_runs",
|
|
"ocr_model_accuracy",
|
|
"ocr_models_ready",
|
|
}
|
|
assert expected <= metric_names, f"missing: {expected - metric_names}"
|
|
|
|
# A second registry yields a separate container — no shared state.
|
|
other_metrics = build_metrics(CollectorRegistry())
|
|
assert metrics is not other_metrics
|