From 18c93d4eaa12c74b89953f47ef6387ff7729f8a0 Mon Sep 17 00:00:00 2001 From: Marcel Date: Thu, 21 May 2026 15:59:37 +0200 Subject: [PATCH] feat(ocr): expose /metrics endpoint via prometheus-fastapi-instrumentator Mount the instrumentator immediately after FastAPI app creation, excluding /health and /metrics from request metrics to keep http_requests_total focused on real application traffic. Refs #652 Co-Authored-By: Claude Sonnet 4.6 --- ocr-service/main.py | 3 +++ ocr-service/requirements.txt | 1 + ocr-service/test_metrics.py | 24 ++++++++++++++++++++++++ 3 files changed, 28 insertions(+) create mode 100644 ocr-service/test_metrics.py diff --git a/ocr-service/main.py b/ocr-service/main.py index 409cc78f..63859eba 100644 --- a/ocr-service/main.py +++ b/ocr-service/main.py @@ -20,6 +20,7 @@ import pypdfium2 as pdfium from fastapi import FastAPI, Form, Header, HTTPException, UploadFile from fastapi.responses import StreamingResponse from PIL import Image +from prometheus_fastapi_instrumentator import Instrumentator from confidence import apply_confidence_markers, get_threshold from spell_check import correct_text, load_spell_checker @@ -72,6 +73,8 @@ async def lifespan(app: FastAPI): app = FastAPI(title="Familienarchiv OCR Service", lifespan=lifespan) +Instrumentator(excluded_handlers=["/health", "/metrics"]).instrument(app).expose(app) + @app.get("/health") def health(): diff --git a/ocr-service/requirements.txt b/ocr-service/requirements.txt index befaafcb..8a9bedec 100644 --- a/ocr-service/requirements.txt +++ b/ocr-service/requirements.txt @@ -10,3 +10,4 @@ pyvips>=2.2.0 httpx==0.28.1 pyspellchecker==0.9.0 opencv-python-headless==4.11.0.86 +prometheus-fastapi-instrumentator==7.0.0 diff --git a/ocr-service/test_metrics.py b/ocr-service/test_metrics.py new file mode 100644 index 00000000..3c6bc037 --- /dev/null +++ b/ocr-service/test_metrics.py @@ -0,0 +1,24 @@ +"""Tests for Prometheus metrics exposed by the OCR service. + +Each test that asserts on a counter/gauge value uses a fresh CollectorRegistry +(see decision #3 on issue #652) to keep the metrics isolated between tests. +""" + +from unittest.mock import patch + +import pytest +from httpx import ASGITransport, AsyncClient + +from main import app + + +@pytest.mark.asyncio +async def test_metrics_endpoint_returns_200(): + """`GET /metrics` returns 200 with Prometheus exposition content.""" + with patch("main.kraken_engine.load_models"), \ + patch("main.load_spell_checker"): + async with AsyncClient(transport=ASGITransport(app=app), base_url="http://test") as client: + response = await client.get("/metrics") + + assert response.status_code == 200 + assert "text/plain" in response.headers.get("content-type", "")