test(ocr): assert http_* metrics appear after an /ocr request

Locks down AC1: prometheus-fastapi-instrumentator must keep auto-exposing
http_requests_total and http_request_duration_seconds for application
traffic, not just register the /metrics endpoint.

Refs #652

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Marcel
2026-05-21 16:00:33 +02:00
parent 18c93d4eaa
commit 4bb6685edb

View File

@@ -4,10 +4,11 @@ Each test that asserts on a counter/gauge value uses a fresh CollectorRegistry
(see decision #3 on issue #652) to keep the metrics isolated between tests.
"""
from unittest.mock import patch
from unittest.mock import AsyncMock, patch
import pytest
from httpx import ASGITransport, AsyncClient
from PIL import Image
from main import app
@@ -22,3 +23,36 @@ async def test_metrics_endpoint_returns_200():
assert response.status_code == 200
assert "text/plain" in response.headers.get("content-type", "")
@pytest.mark.asyncio
async def test_metrics_includes_http_request_metrics_after_ocr_call():
"""After a request to /ocr, `/metrics` exposes auto-instrumented http_* metrics."""
mock_images = [Image.new("RGB", (100, 100))]
mock_blocks = [{"pageNumber": 1, "x": 0.0, "y": 0.0, "width": 1.0, "height": 1.0,
"polygon": None, "text": "hi", "words": []}]
with patch("main.kraken_engine.load_models"), \
patch("main.load_spell_checker"), \
patch("main._download_and_convert_pdf", new_callable=AsyncMock, return_value=mock_images), \
patch("main.preprocess_page", side_effect=lambda img: img), \
patch("main.surya_engine.extract_blocks", return_value=mock_blocks):
async with AsyncClient(transport=ASGITransport(app=app), base_url="http://test") as client:
# Models need to be loaded for /ocr to accept requests; force the flag.
import main as main_module
main_module._models_ready = True
try:
ocr_response = await client.post("/ocr", json={
"pdfUrl": "http://minio/doc.pdf",
"scriptType": "TYPEWRITER",
"language": "de",
})
assert ocr_response.status_code == 200, ocr_response.text
metrics_response = await client.get("/metrics")
finally:
main_module._models_ready = False
body = metrics_response.text
assert "http_requests_total" in body
assert "http_request_duration_seconds" in body