feat(ocr): expose Prometheus /metrics endpoint with OCR-domain counters #653

Merged
marcel merged 27 commits from feat/issue-652-ocr-metrics into main 2026-05-21 18:16:48 +02:00
3 changed files with 28 additions and 0 deletions
Showing only changes of commit 18c93d4eaa - Show all commits

View File

@@ -20,6 +20,7 @@ import pypdfium2 as pdfium
from fastapi import FastAPI, Form, Header, HTTPException, UploadFile
from fastapi.responses import StreamingResponse
from PIL import Image
from prometheus_fastapi_instrumentator import Instrumentator
from confidence import apply_confidence_markers, get_threshold
from spell_check import correct_text, load_spell_checker
@@ -72,6 +73,8 @@ async def lifespan(app: FastAPI):
app = FastAPI(title="Familienarchiv OCR Service", lifespan=lifespan)
Instrumentator(excluded_handlers=["/health", "/metrics"]).instrument(app).expose(app)
@app.get("/health")
def health():

View File

@@ -10,3 +10,4 @@ pyvips>=2.2.0
httpx==0.28.1
pyspellchecker==0.9.0
opencv-python-headless==4.11.0.86
prometheus-fastapi-instrumentator==7.0.0

View File

@@ -0,0 +1,24 @@
"""Tests for Prometheus metrics exposed by the OCR service.
Each test that asserts on a counter/gauge value uses a fresh CollectorRegistry
(see decision #3 on issue #652) to keep the metrics isolated between tests.
"""
from unittest.mock import patch
import pytest
from httpx import ASGITransport, AsyncClient
from main import app
@pytest.mark.asyncio
async def test_metrics_endpoint_returns_200():
"""`GET /metrics` returns 200 with Prometheus exposition content."""
with patch("main.kraken_engine.load_models"), \
patch("main.load_spell_checker"):
async with AsyncClient(transport=ASGITransport(app=app), base_url="http://test") as client:
response = await client.get("/metrics")
assert response.status_code == 200
assert "text/plain" in response.headers.get("content-type", "")