feat(ocr): expose Prometheus /metrics endpoint with OCR-domain counters #653

Merged
marcel merged 27 commits from feat/issue-652-ocr-metrics into main 2026-05-21 18:16:48 +02:00
Showing only changes of commit 4bb6685edb - Show all commits

View File

@@ -4,10 +4,11 @@ Each test that asserts on a counter/gauge value uses a fresh CollectorRegistry
(see decision #3 on issue #652) to keep the metrics isolated between tests.
"""
from unittest.mock import patch
from unittest.mock import AsyncMock, patch
import pytest
from httpx import ASGITransport, AsyncClient
from PIL import Image
from main import app
@@ -22,3 +23,36 @@ async def test_metrics_endpoint_returns_200():
assert response.status_code == 200
assert "text/plain" in response.headers.get("content-type", "")
@pytest.mark.asyncio
async def test_metrics_includes_http_request_metrics_after_ocr_call():
"""After a request to /ocr, `/metrics` exposes auto-instrumented http_* metrics."""
mock_images = [Image.new("RGB", (100, 100))]
mock_blocks = [{"pageNumber": 1, "x": 0.0, "y": 0.0, "width": 1.0, "height": 1.0,
"polygon": None, "text": "hi", "words": []}]
with patch("main.kraken_engine.load_models"), \
patch("main.load_spell_checker"), \
patch("main._download_and_convert_pdf", new_callable=AsyncMock, return_value=mock_images), \
patch("main.preprocess_page", side_effect=lambda img: img), \
patch("main.surya_engine.extract_blocks", return_value=mock_blocks):
async with AsyncClient(transport=ASGITransport(app=app), base_url="http://test") as client:
# Models need to be loaded for /ocr to accept requests; force the flag.
import main as main_module
main_module._models_ready = True
try:
ocr_response = await client.post("/ocr", json={
"pdfUrl": "http://minio/doc.pdf",
"scriptType": "TYPEWRITER",
"language": "de",
})
assert ocr_response.status_code == 200, ocr_response.text
metrics_response = await client.get("/metrics")
finally:
main_module._models_ready = False
body = metrics_response.text
assert "http_requests_total" in body
assert "http_request_duration_seconds" in body