feat(ocr): expose Prometheus /metrics endpoint with OCR-domain counters #653

Merged
marcel merged 27 commits from feat/issue-652-ocr-metrics into main 2026-05-21 18:16:48 +02:00
3 changed files with 28 additions and 0 deletions
Showing only changes of commit 18c93d4eaa - Show all commits

View File

@@ -20,6 +20,7 @@ import pypdfium2 as pdfium
from fastapi import FastAPI, Form, Header, HTTPException, UploadFile from fastapi import FastAPI, Form, Header, HTTPException, UploadFile
from fastapi.responses import StreamingResponse from fastapi.responses import StreamingResponse
from PIL import Image from PIL import Image
from prometheus_fastapi_instrumentator import Instrumentator
from confidence import apply_confidence_markers, get_threshold from confidence import apply_confidence_markers, get_threshold
from spell_check import correct_text, load_spell_checker from spell_check import correct_text, load_spell_checker
@@ -72,6 +73,8 @@ async def lifespan(app: FastAPI):
app = FastAPI(title="Familienarchiv OCR Service", lifespan=lifespan) app = FastAPI(title="Familienarchiv OCR Service", lifespan=lifespan)
Instrumentator(excluded_handlers=["/health", "/metrics"]).instrument(app).expose(app)
@app.get("/health") @app.get("/health")
def health(): def health():

View File

@@ -10,3 +10,4 @@ pyvips>=2.2.0
httpx==0.28.1 httpx==0.28.1
pyspellchecker==0.9.0 pyspellchecker==0.9.0
opencv-python-headless==4.11.0.86 opencv-python-headless==4.11.0.86
prometheus-fastapi-instrumentator==7.0.0

View File

@@ -0,0 +1,24 @@
"""Tests for Prometheus metrics exposed by the OCR service.
Each test that asserts on a counter/gauge value uses a fresh CollectorRegistry
(see decision #3 on issue #652) to keep the metrics isolated between tests.
"""
from unittest.mock import patch
import pytest
from httpx import ASGITransport, AsyncClient
from main import app
@pytest.mark.asyncio
async def test_metrics_endpoint_returns_200():
"""`GET /metrics` returns 200 with Prometheus exposition content."""
with patch("main.kraken_engine.load_models"), \
patch("main.load_spell_checker"):
async with AsyncClient(transport=ASGITransport(app=app), base_url="http://test") as client:
response = await client.get("/metrics")
assert response.status_code == 200
assert "text/plain" in response.headers.get("content-type", "")