From 18c93d4eaa12c74b89953f47ef6387ff7729f8a0 Mon Sep 17 00:00:00 2001
From: Marcel <marcel@familienarchiv>
Date: Thu, 21 May 2026 15:59:37 +0200
Subject: [PATCH 01/27] feat(ocr): expose /metrics endpoint via
 prometheus-fastapi-instrumentator

Mount the instrumentator immediately after FastAPI app creation, excluding
/health and /metrics from request metrics to keep http_requests_total focused
on real application traffic.

Refs #652

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 ocr-service/main.py          |  3 +++
 ocr-service/requirements.txt |  1 +
 ocr-service/test_metrics.py  | 24 ++++++++++++++++++++++++
 3 files changed, 28 insertions(+)
 create mode 100644 ocr-service/test_metrics.py

diff --git a/ocr-service/main.py b/ocr-service/main.py
index 409cc78f..63859eba 100644
--- a/ocr-service/main.py
+++ b/ocr-service/main.py
@@ -20,6 +20,7 @@ import pypdfium2 as pdfium
 from fastapi import FastAPI, Form, Header, HTTPException, UploadFile
 from fastapi.responses import StreamingResponse
 from PIL import Image
+from prometheus_fastapi_instrumentator import Instrumentator
 
 from confidence import apply_confidence_markers, get_threshold
 from spell_check import correct_text, load_spell_checker
@@ -72,6 +73,8 @@ async def lifespan(app: FastAPI):
 
 app = FastAPI(title="Familienarchiv OCR Service", lifespan=lifespan)
 
+Instrumentator(excluded_handlers=["/health", "/metrics"]).instrument(app).expose(app)
+
 
 @app.get("/health")
 def health():
diff --git a/ocr-service/requirements.txt b/ocr-service/requirements.txt
index befaafcb..8a9bedec 100644
--- a/ocr-service/requirements.txt
+++ b/ocr-service/requirements.txt
@@ -10,3 +10,4 @@ pyvips>=2.2.0
 httpx==0.28.1
 pyspellchecker==0.9.0
 opencv-python-headless==4.11.0.86
+prometheus-fastapi-instrumentator==7.0.0
diff --git a/ocr-service/test_metrics.py b/ocr-service/test_metrics.py
new file mode 100644
index 00000000..3c6bc037
--- /dev/null
+++ b/ocr-service/test_metrics.py
@@ -0,0 +1,24 @@
+"""Tests for Prometheus metrics exposed by the OCR service.
+
+Each test that asserts on a counter/gauge value uses a fresh CollectorRegistry
+(see decision #3 on issue #652) to keep the metrics isolated between tests.
+"""
+
+from unittest.mock import patch
+
+import pytest
+from httpx import ASGITransport, AsyncClient
+
+from main import app
+
+
+@pytest.mark.asyncio
+async def test_metrics_endpoint_returns_200():
+    """`GET /metrics` returns 200 with Prometheus exposition content."""
+    with patch("main.kraken_engine.load_models"), \
+         patch("main.load_spell_checker"):
+        async with AsyncClient(transport=ASGITransport(app=app), base_url="http://test") as client:
+            response = await client.get("/metrics")
+
+    assert response.status_code == 200
+    assert "text/plain" in response.headers.get("content-type", "")
-- 
2.49.1


From 4bb6685edb5d82f0f53c3dcfb267f8d8b911e287 Mon Sep 17 00:00:00 2001
From: Marcel <marcel@familienarchiv>
Date: Thu, 21 May 2026 16:00:33 +0200
Subject: [PATCH 02/27] test(ocr): assert http_* metrics appear after an /ocr
 request

Locks down AC1: prometheus-fastapi-instrumentator must keep auto-exposing
http_requests_total and http_request_duration_seconds for application
traffic, not just register the /metrics endpoint.

Refs #652

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 ocr-service/test_metrics.py | 36 +++++++++++++++++++++++++++++++++++-
 1 file changed, 35 insertions(+), 1 deletion(-)

diff --git a/ocr-service/test_metrics.py b/ocr-service/test_metrics.py
index 3c6bc037..442a2d0e 100644
--- a/ocr-service/test_metrics.py
+++ b/ocr-service/test_metrics.py
@@ -4,10 +4,11 @@ Each test that asserts on a counter/gauge value uses a fresh CollectorRegistry
 (see decision #3 on issue #652) to keep the metrics isolated between tests.
 """
 
-from unittest.mock import patch
+from unittest.mock import AsyncMock, patch
 
 import pytest
 from httpx import ASGITransport, AsyncClient
+from PIL import Image
 
 from main import app
 
@@ -22,3 +23,36 @@ async def test_metrics_endpoint_returns_200():
 
     assert response.status_code == 200
     assert "text/plain" in response.headers.get("content-type", "")
+
+
+@pytest.mark.asyncio
+async def test_metrics_includes_http_request_metrics_after_ocr_call():
+    """After a request to /ocr, `/metrics` exposes auto-instrumented http_* metrics."""
+    mock_images = [Image.new("RGB", (100, 100))]
+    mock_blocks = [{"pageNumber": 1, "x": 0.0, "y": 0.0, "width": 1.0, "height": 1.0,
+                    "polygon": None, "text": "hi", "words": []}]
+
+    with patch("main.kraken_engine.load_models"), \
+         patch("main.load_spell_checker"), \
+         patch("main._download_and_convert_pdf", new_callable=AsyncMock, return_value=mock_images), \
+         patch("main.preprocess_page", side_effect=lambda img: img), \
+         patch("main.surya_engine.extract_blocks", return_value=mock_blocks):
+        async with AsyncClient(transport=ASGITransport(app=app), base_url="http://test") as client:
+            # Models need to be loaded for /ocr to accept requests; force the flag.
+            import main as main_module
+            main_module._models_ready = True
+            try:
+                ocr_response = await client.post("/ocr", json={
+                    "pdfUrl": "http://minio/doc.pdf",
+                    "scriptType": "TYPEWRITER",
+                    "language": "de",
+                })
+                assert ocr_response.status_code == 200, ocr_response.text
+
+                metrics_response = await client.get("/metrics")
+            finally:
+                main_module._models_ready = False
+
+    body = metrics_response.text
+    assert "http_requests_total" in body
+    assert "http_request_duration_seconds" in body
-- 
2.49.1


From f3e3545d0682e3b19b61a8f80ea3a00f6cbb72e1 Mon Sep 17 00:00:00 2001
From: Marcel <marcel@familienarchiv>
Date: Thu, 21 May 2026 16:02:20 +0200
Subject: [PATCH 03/27] feat(ocr): add metrics.py factory with test-scoped
 CollectorRegistry support

Encapsulates every custom OCR metric in an OcrMetrics frozen dataclass and
exposes a `build_metrics(registry)` factory. Production main.py binds against
the default REGISTRY; tests construct a fresh CollectorRegistry per case and
monkeypatch main.metrics, so counter values stay isolated between tests
(decision #3 on issue #652, Option A).

Refs #652

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 ocr-service/main.py         |  4 ++
 ocr-service/metrics.py      | 92 +++++++++++++++++++++++++++++++++++++
 ocr-service/test_metrics.py | 35 ++++++++++++++
 3 files changed, 131 insertions(+)
 create mode 100644 ocr-service/metrics.py

diff --git a/ocr-service/main.py b/ocr-service/main.py
index 63859eba..0d068d15 100644
--- a/ocr-service/main.py
+++ b/ocr-service/main.py
@@ -20,9 +20,11 @@ import pypdfium2 as pdfium
 from fastapi import FastAPI, Form, Header, HTTPException, UploadFile
 from fastapi.responses import StreamingResponse
 from PIL import Image
+from prometheus_client import REGISTRY
 from prometheus_fastapi_instrumentator import Instrumentator
 
 from confidence import apply_confidence_markers, get_threshold
+from metrics import OcrMetrics, build_metrics
 from spell_check import correct_text, load_spell_checker
 from engines import kraken as kraken_engine
 from engines import surya as surya_engine
@@ -38,6 +40,8 @@ logger = logging.getLogger(__name__)
 
 _models_ready = False
 
+metrics: OcrMetrics = build_metrics(REGISTRY)
+
 ALLOWED_PDF_HOSTS = set(
     h.strip() for h in os.getenv("ALLOWED_PDF_HOSTS", "minio,localhost,127.0.0.1").split(",")
 )
diff --git a/ocr-service/metrics.py b/ocr-service/metrics.py
new file mode 100644
index 00000000..66efb9f2
--- /dev/null
+++ b/ocr-service/metrics.py
@@ -0,0 +1,92 @@
+"""Prometheus metric definitions for the OCR service.
+
+`build_metrics(registry)` returns a fresh `OcrMetrics` instance bound to the
+given `CollectorRegistry`. Production code calls it once at module load with
+the default `REGISTRY`; tests pass a per-test `CollectorRegistry()` to keep
+counter values isolated between cases (decision #3 on issue #652).
+"""
+
+from __future__ import annotations
+
+from dataclasses import dataclass
+
+from prometheus_client import CollectorRegistry, Counter, Gauge, Histogram
+
+
+@dataclass(frozen=True)
+class OcrMetrics:
+    """Container for every custom OCR metric.
+
+    Counters and gauges are immutable references to `prometheus_client`
+    instances. Mutating them (`.inc()`, `.observe()`, `.set()`) is safe;
+    rebinding the field on the dataclass is not — use `build_metrics` to get
+    a new container.
+    """
+
+    ocr_jobs_total: Counter
+    ocr_pages_total: Counter
+    ocr_skipped_pages_total: Counter
+    ocr_words_total: Counter
+    ocr_illegible_words_total: Counter
+    ocr_processing_seconds: Histogram
+    ocr_training_runs_total: Counter
+    ocr_model_accuracy: Gauge
+    ocr_models_ready: Gauge
+
+
+def build_metrics(registry: CollectorRegistry) -> OcrMetrics:
+    """Create one OcrMetrics instance bound to `registry`."""
+    return OcrMetrics(
+        ocr_jobs_total=Counter(
+            "ocr_jobs_total",
+            "Number of OCR jobs processed, labelled by engine and script type.",
+            ["engine", "script_type"],
+            registry=registry,
+        ),
+        ocr_pages_total=Counter(
+            "ocr_pages_total",
+            "Number of pages successfully OCR'd, labelled by engine.",
+            ["engine"],
+            registry=registry,
+        ),
+        ocr_skipped_pages_total=Counter(
+            "ocr_skipped_pages_total",
+            "Number of pages skipped because the OCR engine raised.",
+            registry=registry,
+        ),
+        ocr_words_total=Counter(
+            "ocr_words_total",
+            "Number of words recognized across all OCR blocks.",
+            registry=registry,
+        ),
+        ocr_illegible_words_total=Counter(
+            "ocr_illegible_words_total",
+            "Number of words below the confidence threshold "
+            "(replaced with [unleserlich]).",
+            registry=registry,
+        ),
+        ocr_processing_seconds=Histogram(
+            "ocr_processing_seconds",
+            "OCR processing time per page (streaming) or per document (non-streaming).",
+            ["engine"],
+            registry=registry,
+        ),
+        ocr_training_runs_total=Counter(
+            "ocr_training_runs_total",
+            "Number of training runs, labelled by kind (recognition|segmentation) "
+            "and outcome (success|error).",
+            ["kind", "outcome"],
+            registry=registry,
+        ),
+        ocr_model_accuracy=Gauge(
+            "ocr_model_accuracy",
+            "Latest model accuracy reported by a successful training run.",
+            ["kind"],
+            registry=registry,
+        ),
+        ocr_models_ready=Gauge(
+            "ocr_models_ready",
+            "1 once the lifespan startup has finished loading models, 0 before.",
+            registry=registry,
+        ),
+    )
diff --git a/ocr-service/test_metrics.py b/ocr-service/test_metrics.py
index 442a2d0e..0906beec 100644
--- a/ocr-service/test_metrics.py
+++ b/ocr-service/test_metrics.py
@@ -9,8 +9,19 @@ from unittest.mock import AsyncMock, patch
 import pytest
 from httpx import ASGITransport, AsyncClient
 from PIL import Image
+from prometheus_client import CollectorRegistry
 
 from main import app
+from metrics import build_metrics
+
+
+@pytest.fixture
+def fresh_metrics(monkeypatch):
+    """Replace the module-level `main.metrics` with one bound to a fresh registry."""
+    registry = CollectorRegistry()
+    test_metrics = build_metrics(registry)
+    monkeypatch.setattr("main.metrics", test_metrics)
+    return test_metrics
 
 
 @pytest.mark.asyncio
@@ -56,3 +67,27 @@ async def test_metrics_includes_http_request_metrics_after_ocr_call():
     body = metrics_response.text
     assert "http_requests_total" in body
     assert "http_request_duration_seconds" in body
+
+
+def test_build_metrics_registers_all_custom_metrics_on_given_registry():
+    """`build_metrics` returns an OcrMetrics bound to the supplied registry."""
+    registry = CollectorRegistry()
+    metrics = build_metrics(registry)
+
+    metric_names = {m.name for m in registry.collect()}
+    expected = {
+        "ocr_jobs",
+        "ocr_pages",
+        "ocr_skipped_pages",
+        "ocr_words",
+        "ocr_illegible_words",
+        "ocr_processing_seconds",
+        "ocr_training_runs",
+        "ocr_model_accuracy",
+        "ocr_models_ready",
+    }
+    assert expected <= metric_names, f"missing: {expected - metric_names}"
+
+    # A second registry yields a separate container — no shared state.
+    other_metrics = build_metrics(CollectorRegistry())
+    assert metrics is not other_metrics
-- 
2.49.1


From 696b71da5abe63915f823ac1a58740c51b89c79e Mon Sep 17 00:00:00 2001
From: Marcel <marcel@familienarchiv>
Date: Thu, 21 May 2026 16:03:37 +0200
Subject: [PATCH 04/27] feat(ocr): increment ocr_jobs_total with engine and
 script_type labels

Pick engine="kraken" for HANDWRITING_KURRENT, engine="surya" otherwise,
then increment after the blocks have been extracted.

Refs #652 (AC2)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 ocr-service/main.py         |  3 +++
 ocr-service/test_metrics.py | 38 +++++++++++++++++++++++++++++++++++++
 2 files changed, 41 insertions(+)

diff --git a/ocr-service/main.py b/ocr-service/main.py
index 0d068d15..83a092ee 100644
--- a/ocr-service/main.py
+++ b/ocr-service/main.py
@@ -106,6 +106,7 @@ async def run_ocr(request: OcrRequest):
         del img
 
     script_type = request.scriptType.upper()
+    engine_name = "kraken" if script_type == "HANDWRITING_KURRENT" else "surya"
 
     if script_type == "HANDWRITING_KURRENT":
         if not kraken_engine.is_available():
@@ -119,6 +120,8 @@ async def run_ocr(request: OcrRequest):
         # TYPEWRITER, HANDWRITING_LATIN, UNKNOWN — all use Surya
         blocks = await asyncio.to_thread(surya_engine.extract_blocks, images, request.language)
 
+    metrics.ocr_jobs_total.labels(engine=engine_name, script_type=script_type).inc()
+
     threshold = get_threshold(script_type)
     for block in blocks:
         if block.get("words"):
diff --git a/ocr-service/test_metrics.py b/ocr-service/test_metrics.py
index 0906beec..2f7b0db8 100644
--- a/ocr-service/test_metrics.py
+++ b/ocr-service/test_metrics.py
@@ -91,3 +91,41 @@ def test_build_metrics_registers_all_custom_metrics_on_given_registry():
     # A second registry yields a separate container — no shared state.
     other_metrics = build_metrics(CollectorRegistry())
     assert metrics is not other_metrics
+
+
+async def _drive_ocr(client: AsyncClient, *, script_type: str) -> None:
+    """Helper — fires /ocr with a single mocked page and asserts a 200."""
+    response = await client.post("/ocr", json={
+        "pdfUrl": "http://minio/doc.pdf",
+        "scriptType": script_type,
+        "language": "de",
+    })
+    assert response.status_code == 200, response.text
+
+
+@pytest.mark.asyncio
+async def test_ocr_jobs_total_incremented_with_kraken_engine_label_for_kurrent(fresh_metrics):
+    """A /ocr call with HANDWRITING_KURRENT increments engine=kraken."""
+    mock_images = [Image.new("RGB", (100, 100))]
+    mock_blocks = [{"pageNumber": 1, "x": 0.0, "y": 0.0, "width": 1.0, "height": 1.0,
+                    "polygon": None, "text": "hi", "words": []}]
+
+    with patch("main.kraken_engine.load_models"), \
+         patch("main.load_spell_checker"), \
+         patch("main.correct_text", side_effect=lambda t: t), \
+         patch("main._download_and_convert_pdf", new_callable=AsyncMock, return_value=mock_images), \
+         patch("main.preprocess_page", side_effect=lambda img: img), \
+         patch("main.kraken_engine.is_available", return_value=True), \
+         patch("main.kraken_engine.extract_blocks", return_value=mock_blocks):
+        async with AsyncClient(transport=ASGITransport(app=app), base_url="http://test") as client:
+            import main as main_module
+            main_module._models_ready = True
+            try:
+                await _drive_ocr(client, script_type="HANDWRITING_KURRENT")
+            finally:
+                main_module._models_ready = False
+
+    value = fresh_metrics.ocr_jobs_total.labels(
+        engine="kraken", script_type="HANDWRITING_KURRENT"
+    )._value.get()
+    assert value == 1.0
-- 
2.49.1


From 52d8dc2b20fd33612ea6afa1aa0179a78a5e34a0 Mon Sep 17 00:00:00 2001
From: Marcel <marcel@familienarchiv>
Date: Thu, 21 May 2026 16:04:20 +0200
Subject: [PATCH 05/27] test(ocr): assert ocr_jobs_total label is engine=surya
 for typewriter
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Locks down AC2 for the non-Kurrent path. The same code branch in /ocr that
sets engine_name from script_type now has explicit coverage for both
HANDWRITING_KURRENT → kraken and TYPEWRITER → surya.

Refs #652 (AC2)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 ocr-service/test_metrics.py | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

diff --git a/ocr-service/test_metrics.py b/ocr-service/test_metrics.py
index 2f7b0db8..f034a31c 100644
--- a/ocr-service/test_metrics.py
+++ b/ocr-service/test_metrics.py
@@ -129,3 +129,29 @@ async def test_ocr_jobs_total_incremented_with_kraken_engine_label_for_kurrent(f
         engine="kraken", script_type="HANDWRITING_KURRENT"
     )._value.get()
     assert value == 1.0
+
+
+@pytest.mark.asyncio
+async def test_ocr_jobs_total_incremented_with_surya_engine_label_for_typewriter(fresh_metrics):
+    """A /ocr call with TYPEWRITER increments engine=surya."""
+    mock_images = [Image.new("RGB", (100, 100))]
+    mock_blocks = [{"pageNumber": 1, "x": 0.0, "y": 0.0, "width": 1.0, "height": 1.0,
+                    "polygon": None, "text": "hi", "words": []}]
+
+    with patch("main.kraken_engine.load_models"), \
+         patch("main.load_spell_checker"), \
+         patch("main._download_and_convert_pdf", new_callable=AsyncMock, return_value=mock_images), \
+         patch("main.preprocess_page", side_effect=lambda img: img), \
+         patch("main.surya_engine.extract_blocks", return_value=mock_blocks):
+        async with AsyncClient(transport=ASGITransport(app=app), base_url="http://test") as client:
+            import main as main_module
+            main_module._models_ready = True
+            try:
+                await _drive_ocr(client, script_type="TYPEWRITER")
+            finally:
+                main_module._models_ready = False
+
+    value = fresh_metrics.ocr_jobs_total.labels(
+        engine="surya", script_type="TYPEWRITER"
+    )._value.get()
+    assert value == 1.0
-- 
2.49.1


From 79edb9455810dcfad65fb5bea05a690443ec2995 Mon Sep 17 00:00:00 2001
From: Marcel <marcel@familienarchiv>
Date: Thu, 21 May 2026 16:05:36 +0200
Subject: [PATCH 06/27] feat(ocr): increment ocr_pages_total per successful
 page in stream

Bumps the counter inside both the standard and guided /ocr/stream
generators after a page yields its blocks, before the per-page json line is
emitted. Also moves the ocr_jobs_total increment for /ocr/stream right after
engine selection so the counter still fires when a page later errors out.

Refs #652 (AC3a)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 ocr-service/main.py         |  5 +++++
 ocr-service/test_metrics.py | 32 ++++++++++++++++++++++++++++++++
 2 files changed, 37 insertions(+)

diff --git a/ocr-service/main.py b/ocr-service/main.py
index 83a092ee..08990e24 100644
--- a/ocr-service/main.py
+++ b/ocr-service/main.py
@@ -156,6 +156,9 @@ async def run_ocr_stream(request: OcrRequest):
         )
 
     engine = kraken_engine if use_kraken else surya_engine
+    engine_name = "kraken" if use_kraken else "surya"
+
+    metrics.ocr_jobs_total.labels(engine=engine_name, script_type=script_type).inc()
 
     if request.regions:
         # Guided mode: recognize only the user-drawn annotation regions
@@ -206,6 +209,7 @@ async def run_ocr_stream(request: OcrRequest):
                         })
 
                     total_blocks += len(blocks)
+                    metrics.ocr_pages_total.labels(engine=engine_name).inc()
                     yield json.dumps({
                         "type": "page",
                         "pageNumber": page_idx,
@@ -260,6 +264,7 @@ async def run_ocr_stream(request: OcrRequest):
                         block["text"] = correct_text(block["text"])
 
                 total_blocks += len(blocks)
+                metrics.ocr_pages_total.labels(engine=engine_name).inc()
                 yield json.dumps({
                     "type": "page",
                     "pageNumber": page_idx,
diff --git a/ocr-service/test_metrics.py b/ocr-service/test_metrics.py
index f034a31c..f91bd0af 100644
--- a/ocr-service/test_metrics.py
+++ b/ocr-service/test_metrics.py
@@ -155,3 +155,35 @@ async def test_ocr_jobs_total_incremented_with_surya_engine_label_for_typewriter
         engine="surya", script_type="TYPEWRITER"
     )._value.get()
     assert value == 1.0
+
+
+@pytest.mark.asyncio
+async def test_ocr_pages_total_incremented_once_per_page_in_stream(fresh_metrics):
+    """The /ocr/stream generator increments ocr_pages_total per successful page."""
+    mock_images = [Image.new("RGB", (100, 100)) for _ in range(3)]
+    mock_blocks = [{"pageNumber": 1, "x": 0.0, "y": 0.0, "width": 1.0, "height": 1.0,
+                    "polygon": None, "text": "hi", "words": []}]
+
+    with patch("main.kraken_engine.load_models"), \
+         patch("main.load_spell_checker"), \
+         patch("main._download_and_convert_pdf", new_callable=AsyncMock, return_value=mock_images), \
+         patch("main.preprocess_page", side_effect=lambda img: img), \
+         patch("main.surya_engine.extract_page_blocks", return_value=mock_blocks):
+        async with AsyncClient(transport=ASGITransport(app=app), base_url="http://test") as client:
+            import main as main_module
+            main_module._models_ready = True
+            try:
+                async with client.stream("POST", "/ocr/stream", json={
+                    "pdfUrl": "http://minio/doc.pdf",
+                    "scriptType": "TYPEWRITER",
+                    "language": "de",
+                }) as response:
+                    assert response.status_code == 200
+                    # Drain the stream so all per-page increments fire.
+                    async for _ in response.aiter_lines():
+                        pass
+            finally:
+                main_module._models_ready = False
+
+    value = fresh_metrics.ocr_pages_total.labels(engine="surya")._value.get()
+    assert value == 3.0
-- 
2.49.1


From 3fa3460dbfb16b13a972d2ce8d59439ece0ff6bc Mon Sep 17 00:00:00 2001
From: Marcel <marcel@familienarchiv>
Date: Thu, 21 May 2026 16:06:50 +0200
Subject: [PATCH 07/27] feat(ocr): increment ocr_skipped_pages_total on
 per-page engine failure

Bumps the counter in both /ocr/stream except blocks (standard and guided
generators) so the existing skipped_pages local variable now also flows
into Prometheus.

Refs #652 (AC3b)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 ocr-service/main.py         |  2 ++
 ocr-service/test_metrics.py | 43 +++++++++++++++++++++++++++++++++++++
 2 files changed, 45 insertions(+)

diff --git a/ocr-service/main.py b/ocr-service/main.py
index 08990e24..9522d8ae 100644
--- a/ocr-service/main.py
+++ b/ocr-service/main.py
@@ -219,6 +219,7 @@ async def run_ocr_stream(request: OcrRequest):
                 except Exception:
                     logger.exception("Guided OCR failed on page %d", page_idx)
                     skipped_pages += 1
+                    metrics.ocr_skipped_pages_total.inc()
                     yield json.dumps({
                         "type": "error",
                         "pageNumber": page_idx,
@@ -274,6 +275,7 @@ async def run_ocr_stream(request: OcrRequest):
             except Exception:
                 logger.exception("OCR failed on page %d", page_idx)
                 skipped_pages += 1
+                metrics.ocr_skipped_pages_total.inc()
                 yield json.dumps({
                     "type": "error",
                     "pageNumber": page_idx,
diff --git a/ocr-service/test_metrics.py b/ocr-service/test_metrics.py
index f91bd0af..627a78d7 100644
--- a/ocr-service/test_metrics.py
+++ b/ocr-service/test_metrics.py
@@ -187,3 +187,46 @@ async def test_ocr_pages_total_incremented_once_per_page_in_stream(fresh_metrics
 
     value = fresh_metrics.ocr_pages_total.labels(engine="surya")._value.get()
     assert value == 3.0
+
+
+@pytest.mark.asyncio
+async def test_ocr_skipped_pages_total_incremented_when_engine_raises_for_a_page(fresh_metrics):
+    """When the engine raises on a page, ocr_skipped_pages_total bumps and the stream finishes."""
+    mock_images = [Image.new("RGB", (100, 100)) for _ in range(2)]
+    good_blocks = [{"pageNumber": 1, "x": 0.0, "y": 0.0, "width": 1.0, "height": 1.0,
+                    "polygon": None, "text": "ok", "words": []}]
+
+    call_count = {"n": 0}
+
+    def extract_side_effect(*args, **kwargs):
+        call_count["n"] += 1
+        if call_count["n"] == 1:
+            raise RuntimeError("synthetic engine failure")
+        return good_blocks
+
+    with patch("main.kraken_engine.load_models"), \
+         patch("main.load_spell_checker"), \
+         patch("main._download_and_convert_pdf", new_callable=AsyncMock, return_value=mock_images), \
+         patch("main.preprocess_page", side_effect=lambda img: img), \
+         patch("main.surya_engine.extract_page_blocks", side_effect=extract_side_effect):
+        async with AsyncClient(transport=ASGITransport(app=app), base_url="http://test") as client:
+            import main as main_module
+            main_module._models_ready = True
+            try:
+                async with client.stream("POST", "/ocr/stream", json={
+                    "pdfUrl": "http://minio/doc.pdf",
+                    "scriptType": "TYPEWRITER",
+                    "language": "de",
+                }) as response:
+                    assert response.status_code == 200
+                    saw_error = False
+                    async for line in response.aiter_lines():
+                        if line and '"type": "error"' in line:
+                            saw_error = True
+                    assert saw_error
+            finally:
+                main_module._models_ready = False
+
+    assert fresh_metrics.ocr_skipped_pages_total._value.get() == 1.0
+    # The second page still succeeds.
+    assert fresh_metrics.ocr_pages_total.labels(engine="surya")._value.get() == 1.0
-- 
2.49.1


From 131ed336bc04c40a182e5110bcaeb7576f862ec8 Mon Sep 17 00:00:00 2001
From: Marcel <marcel@familienarchiv>
Date: Thu, 21 May 2026 16:07:59 +0200
Subject: [PATCH 08/27] feat(ocr): count words and illegible words at the OCR
 call sites

Walks block["words"] before apply_confidence_markers strips the list, then
increments ocr_words_total by len(words) and ocr_illegible_words_total by
the count below threshold. Same pattern in both /ocr and /ocr/stream so the
ratio illegible/words is a faithful quality signal across endpoints.

Refs #652 (AC4)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 ocr-service/main.py         | 18 ++++++++++++++----
 ocr-service/test_metrics.py | 37 +++++++++++++++++++++++++++++++++++++
 2 files changed, 51 insertions(+), 4 deletions(-)

diff --git a/ocr-service/main.py b/ocr-service/main.py
index 9522d8ae..12a77a45 100644
--- a/ocr-service/main.py
+++ b/ocr-service/main.py
@@ -124,8 +124,13 @@ async def run_ocr(request: OcrRequest):
 
     threshold = get_threshold(script_type)
     for block in blocks:
-        if block.get("words"):
-            block["text"] = apply_confidence_markers(block["words"], threshold)
+        words = block.get("words") or []
+        if words:
+            metrics.ocr_words_total.inc(len(words))
+            metrics.ocr_illegible_words_total.inc(
+                sum(1 for w in words if w["confidence"] < threshold)
+            )
+            block["text"] = apply_confidence_markers(words, threshold)
         block.pop("words", None)
         if script_type in _SPELL_CHECK_SCRIPT_TYPES:
             block["text"] = correct_text(block["text"])
@@ -258,8 +263,13 @@ async def run_ocr_stream(request: OcrRequest):
                 )
 
                 for block in blocks:
-                    if block.get("words"):
-                        block["text"] = apply_confidence_markers(block["words"], threshold)
+                    words = block.get("words") or []
+                    if words:
+                        metrics.ocr_words_total.inc(len(words))
+                        metrics.ocr_illegible_words_total.inc(
+                            sum(1 for w in words if w["confidence"] < threshold)
+                        )
+                        block["text"] = apply_confidence_markers(words, threshold)
                     block.pop("words", None)
                     if script_type in _SPELL_CHECK_SCRIPT_TYPES:
                         block["text"] = correct_text(block["text"])
diff --git a/ocr-service/test_metrics.py b/ocr-service/test_metrics.py
index 627a78d7..a13db209 100644
--- a/ocr-service/test_metrics.py
+++ b/ocr-service/test_metrics.py
@@ -230,3 +230,40 @@ async def test_ocr_skipped_pages_total_incremented_when_engine_raises_for_a_page
     assert fresh_metrics.ocr_skipped_pages_total._value.get() == 1.0
     # The second page still succeeds.
     assert fresh_metrics.ocr_pages_total.labels(engine="surya")._value.get() == 1.0
+
+
+@pytest.mark.asyncio
+async def test_ocr_words_and_illegible_words_total_sum_across_blocks(fresh_metrics):
+    """Counters reflect totals summed over every block in the request.
+
+    Threshold defaults to THRESHOLD_DEFAULT (0.3) for non-Kurrent scripts. Two
+    blocks: 3 words above + 2 words below threshold across blocks.
+    """
+    mock_images = [Image.new("RGB", (100, 100))]
+    mock_blocks = [
+        {"pageNumber": 1, "x": 0.0, "y": 0.0, "width": 1.0, "height": 1.0,
+         "polygon": None, "text": "ignored",
+         "words": [{"text": "Lieber", "confidence": 0.9},
+                   {"text": "Freund", "confidence": 0.1}]},
+        {"pageNumber": 1, "x": 0.0, "y": 0.0, "width": 1.0, "height": 1.0,
+         "polygon": None, "text": "ignored",
+         "words": [{"text": "Gruss", "confidence": 0.8},
+                   {"text": "verschmiert", "confidence": 0.05},
+                   {"text": "Karl", "confidence": 0.95}]},
+    ]
+
+    with patch("main.kraken_engine.load_models"), \
+         patch("main.load_spell_checker"), \
+         patch("main._download_and_convert_pdf", new_callable=AsyncMock, return_value=mock_images), \
+         patch("main.preprocess_page", side_effect=lambda img: img), \
+         patch("main.surya_engine.extract_blocks", return_value=mock_blocks):
+        async with AsyncClient(transport=ASGITransport(app=app), base_url="http://test") as client:
+            import main as main_module
+            main_module._models_ready = True
+            try:
+                await _drive_ocr(client, script_type="TYPEWRITER")
+            finally:
+                main_module._models_ready = False
+
+    assert fresh_metrics.ocr_words_total._value.get() == 5.0
+    assert fresh_metrics.ocr_illegible_words_total._value.get() == 2.0
-- 
2.49.1


From 2e3744d9ef6c065ca444e25c39d3a4ac1996dade Mon Sep 17 00:00:00 2001
From: Marcel <marcel@familienarchiv>
Date: Thu, 21 May 2026 16:09:25 +0200
Subject: [PATCH 09/27] feat(ocr): observe ocr_processing_seconds around
 engine.to_thread calls

Wraps every asyncio.to_thread(engine.extract_*) call with time.monotonic()
deltas in /ocr (per document) and in both /ocr/stream generators (per page).
Streaming buckets are the useful operational signal; the non-streaming
observation is a bonus.

Refs #652 (AC5)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 ocr-service/main.py         | 13 ++++++++++++
 ocr-service/test_metrics.py | 40 +++++++++++++++++++++++++++++++++++++
 2 files changed, 53 insertions(+)

diff --git a/ocr-service/main.py b/ocr-service/main.py
index 12a77a45..c9f5b783 100644
--- a/ocr-service/main.py
+++ b/ocr-service/main.py
@@ -10,6 +10,7 @@ import re
 import shutil
 import subprocess
 import tempfile
+import time
 import zipfile
 from contextlib import asynccontextmanager
 from datetime import datetime, timezone
@@ -108,6 +109,7 @@ async def run_ocr(request: OcrRequest):
     script_type = request.scriptType.upper()
     engine_name = "kraken" if script_type == "HANDWRITING_KURRENT" else "surya"
 
+    extract_started = time.monotonic()
     if script_type == "HANDWRITING_KURRENT":
         if not kraken_engine.is_available():
             raise HTTPException(
@@ -119,6 +121,9 @@ async def run_ocr(request: OcrRequest):
     else:
         # TYPEWRITER, HANDWRITING_LATIN, UNKNOWN — all use Surya
         blocks = await asyncio.to_thread(surya_engine.extract_blocks, images, request.language)
+    metrics.ocr_processing_seconds.labels(engine=engine_name).observe(
+        time.monotonic() - extract_started
+    )
 
     metrics.ocr_jobs_total.labels(engine=engine_name, script_type=script_type).inc()
 
@@ -194,6 +199,7 @@ async def run_ocr_stream(request: OcrRequest):
                     image = await asyncio.to_thread(preprocess_page, image)
                     blocks = []
                     sender_path = request.senderModelPath if use_kraken else None
+                    page_started = time.monotonic()
                     for region in page_regions:
                         text = await asyncio.to_thread(
                             engine.extract_region_text, image,
@@ -213,6 +219,9 @@ async def run_ocr_stream(request: OcrRequest):
                             "annotationId": region.annotationId,
                         })
 
+                    metrics.ocr_processing_seconds.labels(engine=engine_name).observe(
+                        time.monotonic() - page_started
+                    )
                     total_blocks += len(blocks)
                     metrics.ocr_pages_total.labels(engine=engine_name).inc()
                     yield json.dumps({
@@ -258,9 +267,13 @@ async def run_ocr_stream(request: OcrRequest):
                 yield json.dumps({"type": "preprocessing", "pageNumber": page_idx}) + "\n"
                 image = await asyncio.to_thread(preprocess_page, image)
                 sender_path = request.senderModelPath if use_kraken else None
+                page_started = time.monotonic()
                 blocks = await asyncio.to_thread(
                     engine.extract_page_blocks, image, page_idx, request.language, sender_path
                 )
+                metrics.ocr_processing_seconds.labels(engine=engine_name).observe(
+                    time.monotonic() - page_started
+                )
 
                 for block in blocks:
                     words = block.get("words") or []
diff --git a/ocr-service/test_metrics.py b/ocr-service/test_metrics.py
index a13db209..5701bbe3 100644
--- a/ocr-service/test_metrics.py
+++ b/ocr-service/test_metrics.py
@@ -267,3 +267,43 @@ async def test_ocr_words_and_illegible_words_total_sum_across_blocks(fresh_metri
 
     assert fresh_metrics.ocr_words_total._value.get() == 5.0
     assert fresh_metrics.ocr_illegible_words_total._value.get() == 2.0
+
+
+def _histogram_count_sum(histogram, **labels) -> tuple[float, float]:
+    """Read the per-label-set _count and _sum from a prometheus_client Histogram."""
+    child = histogram.labels(**labels)
+    return child._sum.get(), sum(b.get() for b in child._buckets)
+
+
+@pytest.mark.asyncio
+async def test_ocr_processing_seconds_histogram_observed_per_page_in_stream(fresh_metrics):
+    """The streaming generator observes ocr_processing_seconds once per page."""
+    mock_images = [Image.new("RGB", (100, 100)) for _ in range(2)]
+    mock_blocks = [{"pageNumber": 1, "x": 0.0, "y": 0.0, "width": 1.0, "height": 1.0,
+                    "polygon": None, "text": "ok", "words": []}]
+
+    with patch("main.kraken_engine.load_models"), \
+         patch("main.load_spell_checker"), \
+         patch("main._download_and_convert_pdf", new_callable=AsyncMock, return_value=mock_images), \
+         patch("main.preprocess_page", side_effect=lambda img: img), \
+         patch("main.surya_engine.extract_page_blocks", return_value=mock_blocks):
+        async with AsyncClient(transport=ASGITransport(app=app), base_url="http://test") as client:
+            import main as main_module
+            main_module._models_ready = True
+            try:
+                async with client.stream("POST", "/ocr/stream", json={
+                    "pdfUrl": "http://minio/doc.pdf",
+                    "scriptType": "TYPEWRITER",
+                    "language": "de",
+                }) as response:
+                    assert response.status_code == 200
+                    async for _ in response.aiter_lines():
+                        pass
+            finally:
+                main_module._models_ready = False
+
+    sum_seconds, count = _histogram_count_sum(
+        fresh_metrics.ocr_processing_seconds, engine="surya"
+    )
+    assert count == 2.0
+    assert sum_seconds >= 0.0
-- 
2.49.1


From 6c2b9af10bc61b6e5583ea131422ead6ec8831f9 Mon Sep 17 00:00:00 2001
From: Marcel <marcel@familienarchiv>
Date: Thu, 21 May 2026 16:12:26 +0200
Subject: [PATCH 10/27] feat(ocr): record training runs in
 ocr_training_runs_total per kind and outcome

Wraps the await asyncio.to_thread(_run_*) calls in /train, /train-sender,
and /segtrain with try/except. Recognition training (/train, /train-sender)
shares kind="recognition"; /segtrain uses kind="segmentation". The
ocr_model_accuracy gauge is set per kind on success.

Refs #652 (AC6, decision #2)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 ocr-service/main.py         | 27 +++++++++++--
 ocr-service/test_metrics.py | 81 +++++++++++++++++++++++++++++++++++++
 2 files changed, 105 insertions(+), 3 deletions(-)

diff --git a/ocr-service/main.py b/ocr-service/main.py
index c9f5b783..1460a213 100644
--- a/ocr-service/main.py
+++ b/ocr-service/main.py
@@ -478,7 +478,14 @@ async def train_model(
 
             return {"loss": None, "accuracy": accuracy, "cer": cer, "epochs": epochs}
 
-    result = await asyncio.to_thread(_run_training)
+    try:
+        result = await asyncio.to_thread(_run_training)
+    except Exception:
+        metrics.ocr_training_runs_total.labels(kind="recognition", outcome="error").inc()
+        raise
+    metrics.ocr_training_runs_total.labels(kind="recognition", outcome="success").inc()
+    if result.get("accuracy") is not None:
+        metrics.ocr_model_accuracy.labels(kind="recognition").set(result["accuracy"])
     return result
 
 
@@ -558,7 +565,14 @@ async def train_sender_model(
 
             return {"loss": None, "accuracy": accuracy, "cer": cer, "epochs": epochs}
 
-    result = await asyncio.to_thread(_run_sender_training)
+    try:
+        result = await asyncio.to_thread(_run_sender_training)
+    except Exception:
+        metrics.ocr_training_runs_total.labels(kind="recognition", outcome="error").inc()
+        raise
+    metrics.ocr_training_runs_total.labels(kind="recognition", outcome="success").inc()
+    if result.get("accuracy") is not None:
+        metrics.ocr_model_accuracy.labels(kind="recognition").set(result["accuracy"])
     return result
 
 
@@ -668,7 +682,14 @@ async def segtrain_model(
 
             return {"loss": None, "accuracy": accuracy, "cer": cer, "epochs": epochs}
 
-    result = await asyncio.to_thread(_run_segtrain)
+    try:
+        result = await asyncio.to_thread(_run_segtrain)
+    except Exception:
+        metrics.ocr_training_runs_total.labels(kind="segmentation", outcome="error").inc()
+        raise
+    metrics.ocr_training_runs_total.labels(kind="segmentation", outcome="success").inc()
+    if result.get("accuracy") is not None:
+        metrics.ocr_model_accuracy.labels(kind="segmentation").set(result["accuracy"])
     return result
 
 
diff --git a/ocr-service/test_metrics.py b/ocr-service/test_metrics.py
index 5701bbe3..55108fb8 100644
--- a/ocr-service/test_metrics.py
+++ b/ocr-service/test_metrics.py
@@ -4,6 +4,8 @@ Each test that asserts on a counter/gauge value uses a fresh CollectorRegistry
 (see decision #3 on issue #652) to keep the metrics isolated between tests.
 """
 
+import io
+import zipfile
 from unittest.mock import AsyncMock, patch
 
 import pytest
@@ -15,6 +17,18 @@ from main import app
 from metrics import build_metrics
 
 
+def _minimal_zip() -> bytes:
+    """Return a ZIP containing one fake .xml so endpoint validation passes."""
+    buf = io.BytesIO()
+    with zipfile.ZipFile(buf, "w") as zf:
+        zf.writestr("page_01.xml", "<PcGts/>")
+    return buf.getvalue()
+
+
+def _fake_training_result(accuracy: float = 0.91) -> dict:
+    return {"loss": None, "accuracy": accuracy, "cer": round(1 - accuracy, 4), "epochs": 5}
+
+
 @pytest.fixture
 def fresh_metrics(monkeypatch):
     """Replace the module-level `main.metrics` with one bound to a fresh registry."""
@@ -307,3 +321,70 @@ async def test_ocr_processing_seconds_histogram_observed_per_page_in_stream(fres
     )
     assert count == 2.0
     assert sum_seconds >= 0.0
+
+
+@pytest.mark.asyncio
+async def test_ocr_training_runs_total_incremented_with_recognition_success_label(fresh_metrics):
+    """/train success increments ocr_training_runs_total{kind=recognition, outcome=success}."""
+    async def fake_to_thread(func, *args, **kwargs):
+        return _fake_training_result()
+
+    with patch("main.TRAINING_TOKEN", "secret-token"), \
+         patch("main._models_ready", True), \
+         patch("main.asyncio.to_thread", side_effect=fake_to_thread):
+        async with AsyncClient(transport=ASGITransport(app=app), base_url="http://test") as client:
+            response = await client.post(
+                "/train",
+                files={"file": ("training.zip", _minimal_zip(), "application/zip")},
+                headers={"X-Training-Token": "secret-token"},
+            )
+
+    assert response.status_code == 200
+    assert fresh_metrics.ocr_training_runs_total.labels(
+        kind="recognition", outcome="success"
+    )._value.get() == 1.0
+
+
+@pytest.mark.asyncio
+async def test_ocr_training_runs_total_incremented_with_recognition_error_label(fresh_metrics):
+    """When /train's inner runner raises, the error counter bumps and the exception propagates."""
+    async def fake_to_thread(func, *args, **kwargs):
+        raise RuntimeError("ketos train failed (exit 1): synthetic")
+
+    with patch("main.TRAINING_TOKEN", "secret-token"), \
+         patch("main._models_ready", True), \
+         patch("main.asyncio.to_thread", side_effect=fake_to_thread):
+        transport = ASGITransport(app=app, raise_app_exceptions=False)
+        async with AsyncClient(transport=transport, base_url="http://test") as client:
+            response = await client.post(
+                "/train",
+                files={"file": ("training.zip", _minimal_zip(), "application/zip")},
+                headers={"X-Training-Token": "secret-token"},
+            )
+
+    assert response.status_code == 500
+    assert fresh_metrics.ocr_training_runs_total.labels(
+        kind="recognition", outcome="error"
+    )._value.get() == 1.0
+
+
+@pytest.mark.asyncio
+async def test_ocr_training_runs_total_incremented_with_segmentation_success_label(fresh_metrics):
+    """/segtrain success increments ocr_training_runs_total{kind=segmentation, outcome=success}."""
+    async def fake_to_thread(func, *args, **kwargs):
+        return _fake_training_result(accuracy=0.83)
+
+    with patch("main.TRAINING_TOKEN", "secret-token"), \
+         patch("main._models_ready", True), \
+         patch("main.asyncio.to_thread", side_effect=fake_to_thread):
+        async with AsyncClient(transport=ASGITransport(app=app), base_url="http://test") as client:
+            response = await client.post(
+                "/segtrain",
+                files={"file": ("training.zip", _minimal_zip(), "application/zip")},
+                headers={"X-Training-Token": "secret-token"},
+            )
+
+    assert response.status_code == 200
+    assert fresh_metrics.ocr_training_runs_total.labels(
+        kind="segmentation", outcome="success"
+    )._value.get() == 1.0
-- 
2.49.1


From 77d59c5d835e72a771fdee43ad4c14c9c554e5af Mon Sep 17 00:00:00 2001
From: Marcel <marcel@familienarchiv>
Date: Thu, 21 May 2026 16:13:05 +0200
Subject: [PATCH 11/27] test(ocr): assert ocr_model_accuracy gauge is set per
 kind on success

Hits /train then /segtrain through the same test, each with a distinct
mocked accuracy, and asserts the labelled gauges reflect the two values.
Locks down the kind-label separation between recognition and segmentation
accuracy (decision #2).

Refs #652 (AC6)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 ocr-service/test_metrics.py | 34 ++++++++++++++++++++++++++++++++++
 1 file changed, 34 insertions(+)

diff --git a/ocr-service/test_metrics.py b/ocr-service/test_metrics.py
index 55108fb8..7d154102 100644
--- a/ocr-service/test_metrics.py
+++ b/ocr-service/test_metrics.py
@@ -388,3 +388,37 @@ async def test_ocr_training_runs_total_incremented_with_segmentation_success_lab
     assert fresh_metrics.ocr_training_runs_total.labels(
         kind="segmentation", outcome="success"
     )._value.get() == 1.0
+
+
+@pytest.mark.asyncio
+async def test_ocr_model_accuracy_gauge_set_per_kind_after_successful_training(fresh_metrics):
+    """After /train and /segtrain succeed, ocr_model_accuracy{kind=...} reflects the result."""
+    recognition_accuracy = 0.917
+    segmentation_accuracy = 0.834
+
+    async def fake_recognition_to_thread(func, *args, **kwargs):
+        return _fake_training_result(accuracy=recognition_accuracy)
+
+    async def fake_segmentation_to_thread(func, *args, **kwargs):
+        return _fake_training_result(accuracy=segmentation_accuracy)
+
+    with patch("main.TRAINING_TOKEN", "secret-token"), \
+         patch("main._models_ready", True):
+        async with AsyncClient(transport=ASGITransport(app=app), base_url="http://test") as client:
+            with patch("main.asyncio.to_thread", side_effect=fake_recognition_to_thread):
+                rec_resp = await client.post(
+                    "/train",
+                    files={"file": ("training.zip", _minimal_zip(), "application/zip")},
+                    headers={"X-Training-Token": "secret-token"},
+                )
+            assert rec_resp.status_code == 200
+            with patch("main.asyncio.to_thread", side_effect=fake_segmentation_to_thread):
+                seg_resp = await client.post(
+                    "/segtrain",
+                    files={"file": ("training.zip", _minimal_zip(), "application/zip")},
+                    headers={"X-Training-Token": "secret-token"},
+                )
+            assert seg_resp.status_code == 200
+
+    assert fresh_metrics.ocr_model_accuracy.labels(kind="recognition")._value.get() == pytest.approx(recognition_accuracy)
+    assert fresh_metrics.ocr_model_accuracy.labels(kind="segmentation")._value.get() == pytest.approx(segmentation_accuracy)
-- 
2.49.1


From d6abf990c71b524a91fbae8d319f085c68037eeb Mon Sep 17 00:00:00 2001
From: Marcel <marcel@familienarchiv>
Date: Thu, 21 May 2026 16:15:11 +0200
Subject: [PATCH 12/27] feat(ocr): flip ocr_models_ready to 1 once the lifespan
 startup finishes

Mirrors the existing _models_ready bool so Prometheus has a time-series
liveness/readiness signal for future alerting rules (e.g.
ocr_models_ready < 1 for 2m).

Refs #652 (AC7)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 ocr-service/main.py         |  1 +
 ocr-service/test_metrics.py | 20 ++++++++++++++++++++
 2 files changed, 21 insertions(+)

diff --git a/ocr-service/main.py b/ocr-service/main.py
index 1460a213..f7dc495d 100644
--- a/ocr-service/main.py
+++ b/ocr-service/main.py
@@ -69,6 +69,7 @@ async def lifespan(app: FastAPI):
     kraken_engine.load_models()
     load_spell_checker()
     _models_ready = True
+    metrics.ocr_models_ready.set(1)
     logger.info("Startup complete — ready to accept requests")
 
     yield
diff --git a/ocr-service/test_metrics.py b/ocr-service/test_metrics.py
index 7d154102..5c472dbe 100644
--- a/ocr-service/test_metrics.py
+++ b/ocr-service/test_metrics.py
@@ -422,3 +422,23 @@ async def test_ocr_model_accuracy_gauge_set_per_kind_after_successful_training(f
 
     assert fresh_metrics.ocr_model_accuracy.labels(kind="recognition")._value.get() == pytest.approx(recognition_accuracy)
     assert fresh_metrics.ocr_model_accuracy.labels(kind="segmentation")._value.get() == pytest.approx(segmentation_accuracy)
+
+
+def test_ocr_models_ready_gauge_defaults_to_zero():
+    """A freshly-built OcrMetrics has ocr_models_ready=0 before lifespan runs."""
+    metrics = build_metrics(CollectorRegistry())
+    assert metrics.ocr_models_ready._value.get() == 0.0
+
+
+@pytest.mark.asyncio
+async def test_ocr_models_ready_gauge_is_one_after_lifespan_startup(fresh_metrics):
+    """The lifespan flips ocr_models_ready to 1 once load_models / load_spell_checker return.
+
+    ASGITransport does not run lifespan by default, so the lifespan context
+    manager is driven directly to exercise the startup code path.
+    """
+    assert fresh_metrics.ocr_models_ready._value.get() == 0.0
+    with patch("main.kraken_engine.load_models"), \
+         patch("main.load_spell_checker"):
+        async with app.router.lifespan_context(app):
+            assert fresh_metrics.ocr_models_ready._value.get() == 1.0
-- 
2.49.1


From 525f091b3a707046ef298352a466b0839dedac36 Mon Sep 17 00:00:00 2001
From: Marcel <marcel@familienarchiv>
Date: Thu, 21 May 2026 16:16:14 +0200
Subject: [PATCH 13/27] feat(ocr): suppress uvicorn access logs for /metrics
 and /health

Adds a logging.Filter on uvicorn.access that drops records whose request
path is /metrics or /health. Each is hit on a tight schedule (Prometheus
scrape interval and Docker healthcheck), so unfiltered they dominate the
access log without carrying any information about real traffic.

Refs #652 (Nora's recommendation)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 ocr-service/main.py         | 17 +++++++++++++++++
 ocr-service/test_metrics.py | 30 ++++++++++++++++++++++++++++++
 2 files changed, 47 insertions(+)

diff --git a/ocr-service/main.py b/ocr-service/main.py
index f7dc495d..bf26ef35 100644
--- a/ocr-service/main.py
+++ b/ocr-service/main.py
@@ -82,6 +82,23 @@ app = FastAPI(title="Familienarchiv OCR Service", lifespan=lifespan)
 Instrumentator(excluded_handlers=["/health", "/metrics"]).instrument(app).expose(app)
 
 
+class MetricsPathFilter(logging.Filter):
+    """Drop uvicorn.access entries for /metrics and /health to keep logs focused."""
+
+    _SUPPRESSED_PATHS = {"/metrics", "/health"}
+
+    def filter(self, record: logging.LogRecord) -> bool:
+        # uvicorn.access formats as: '%s - "%s %s HTTP/%s" %d'
+        if record.args and len(record.args) >= 3:
+            path = record.args[2]
+            if isinstance(path, str) and path in self._SUPPRESSED_PATHS:
+                return False
+        return True
+
+
+logging.getLogger("uvicorn.access").addFilter(MetricsPathFilter())
+
+
 @app.get("/health")
 def health():
     """Health endpoint — returns 200 only after models are loaded."""
diff --git a/ocr-service/test_metrics.py b/ocr-service/test_metrics.py
index 5c472dbe..271b57d8 100644
--- a/ocr-service/test_metrics.py
+++ b/ocr-service/test_metrics.py
@@ -442,3 +442,33 @@ async def test_ocr_models_ready_gauge_is_one_after_lifespan_startup(fresh_metric
          patch("main.load_spell_checker"):
         async with app.router.lifespan_context(app):
             assert fresh_metrics.ocr_models_ready._value.get() == 1.0
+
+
+def test_uvicorn_access_log_filter_skips_metrics_path():
+    """The MetricsPathFilter drops uvicorn.access log records that target /metrics."""
+    import logging as _logging
+    from main import MetricsPathFilter
+
+    filt = MetricsPathFilter()
+    metrics_record = _logging.LogRecord(
+        name="uvicorn.access", level=_logging.INFO, pathname="", lineno=0,
+        msg='%s - "%s %s HTTP/%s" %d',
+        args=("127.0.0.1:1234", "GET", "/metrics", "1.1", 200),
+        exc_info=None,
+    )
+    health_record = _logging.LogRecord(
+        name="uvicorn.access", level=_logging.INFO, pathname="", lineno=0,
+        msg='%s - "%s %s HTTP/%s" %d',
+        args=("127.0.0.1:1234", "GET", "/health", "1.1", 200),
+        exc_info=None,
+    )
+    ocr_record = _logging.LogRecord(
+        name="uvicorn.access", level=_logging.INFO, pathname="", lineno=0,
+        msg='%s - "%s %s HTTP/%s" %d',
+        args=("127.0.0.1:1234", "POST", "/ocr", "1.1", 200),
+        exc_info=None,
+    )
+
+    assert filt.filter(metrics_record) is False
+    assert filt.filter(health_record) is False
+    assert filt.filter(ocr_record) is True
-- 
2.49.1


From e75ac8ec451f967f940899fde3bf32c72d345f9f Mon Sep 17 00:00:00 2001
From: Marcel <marcel@familienarchiv>
Date: Thu, 21 May 2026 16:16:51 +0200
Subject: [PATCH 14/27] ops(observability): drop TODO from ocr-service scrape
 job in prometheus.yml
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The TODO was a placeholder for this work — the OCR service now exposes
/metrics so the target will flip from DOWN to UP on next image rebuild.

Refs #652

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 infra/observability/prometheus/prometheus.yml | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/infra/observability/prometheus/prometheus.yml b/infra/observability/prometheus/prometheus.yml
index a29cc75b..4838bc1c 100644
--- a/infra/observability/prometheus/prometheus.yml
+++ b/infra/observability/prometheus/prometheus.yml
@@ -20,7 +20,4 @@ scrape_configs:
   - job_name: ocr-service
     metrics_path: /metrics
     static_configs:
-      # TODO: remove or add prometheus-client to ocr-service.
-      # The Python OCR service does not currently expose Prometheus metrics.
-      # This target will show as DOWN until prometheus-client is added to ocr-service.
       - targets: ['ocr:8000']
-- 
2.49.1


From ebaedb1af0107025847dcd09ce3c10fe177bfeb0 Mon Sep 17 00:00:00 2001
From: Marcel <marcel@familienarchiv>
Date: Thu, 21 May 2026 16:51:23 +0200
Subject: [PATCH 15/27] test(ocr): assert ocr_jobs_total stays zero when stream
 download fails

Locks in the post-download placement of the counter increment so a
regression that moves it back above _download_and_convert_pdf would fail.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 ocr-service/test_metrics.py | 32 ++++++++++++++++++++++++++++++++
 1 file changed, 32 insertions(+)

diff --git a/ocr-service/test_metrics.py b/ocr-service/test_metrics.py
index 271b57d8..783e1d25 100644
--- a/ocr-service/test_metrics.py
+++ b/ocr-service/test_metrics.py
@@ -444,6 +444,38 @@ async def test_ocr_models_ready_gauge_is_one_after_lifespan_startup(fresh_metric
             assert fresh_metrics.ocr_models_ready._value.get() == 1.0
 
 
+@pytest.mark.asyncio
+async def test_ocr_jobs_total_not_incremented_when_pdf_download_fails_in_stream(fresh_metrics):
+    """If `_download_and_convert_pdf` raises, ocr_jobs_total is NOT incremented.
+
+    Mirrors the /ocr endpoint's semantics: the counter only records jobs that
+    actually started OCR work, not failed downloads.
+    """
+    async def fail_download(url):
+        raise RuntimeError("synthetic download failure")
+
+    with patch("main.kraken_engine.load_models"), \
+         patch("main.load_spell_checker"), \
+         patch("main._download_and_convert_pdf", new=fail_download):
+        transport = ASGITransport(app=app, raise_app_exceptions=False)
+        async with AsyncClient(transport=transport, base_url="http://test") as client:
+            import main as main_module
+            main_module._models_ready = True
+            try:
+                response = await client.post("/ocr/stream", json={
+                    "pdfUrl": "http://minio/doc.pdf",
+                    "scriptType": "TYPEWRITER",
+                    "language": "de",
+                })
+            finally:
+                main_module._models_ready = False
+
+    assert response.status_code == 500
+    assert fresh_metrics.ocr_jobs_total.labels(
+        engine="surya", script_type="TYPEWRITER"
+    )._value.get() == 0.0
+
+
 def test_uvicorn_access_log_filter_skips_metrics_path():
     """The MetricsPathFilter drops uvicorn.access log records that target /metrics."""
     import logging as _logging
-- 
2.49.1


From 74ddf16b01cfdd727eb4465347b2f4bc0b92d609 Mon Sep 17 00:00:00 2001
From: Marcel <marcel@familienarchiv>
Date: Thu, 21 May 2026 16:53:04 +0200
Subject: [PATCH 16/27] feat(ocr): time only engine work in guided stream
 histogram

Previously the guided generator's page_started timer wrapped the entire
region loop including the synchronous correct_text() call, inflating
ocr_processing_seconds with spell-check latency. Sum the per-region
engine.extract_region_text durations instead so the histogram matches
the unguided stream's "engine only" semantic.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 ocr-service/main.py         |  6 ++-
 ocr-service/test_metrics.py | 83 +++++++++++++++++++++++++++++++++++++
 2 files changed, 87 insertions(+), 2 deletions(-)

diff --git a/ocr-service/main.py b/ocr-service/main.py
index bf26ef35..31fd9cad 100644
--- a/ocr-service/main.py
+++ b/ocr-service/main.py
@@ -217,13 +217,15 @@ async def run_ocr_stream(request: OcrRequest):
                     image = await asyncio.to_thread(preprocess_page, image)
                     blocks = []
                     sender_path = request.senderModelPath if use_kraken else None
-                    page_started = time.monotonic()
+                    engine_seconds = 0.0
                     for region in page_regions:
+                        region_started = time.monotonic()
                         text = await asyncio.to_thread(
                             engine.extract_region_text, image,
                             region.x, region.y, region.width, region.height,
                             sender_path,
                         )
+                        engine_seconds += time.monotonic() - region_started
                         if script_type in _SPELL_CHECK_SCRIPT_TYPES:
                             text = correct_text(text)
                         blocks.append({
@@ -238,7 +240,7 @@ async def run_ocr_stream(request: OcrRequest):
                         })
 
                     metrics.ocr_processing_seconds.labels(engine=engine_name).observe(
-                        time.monotonic() - page_started
+                        engine_seconds
                     )
                     total_blocks += len(blocks)
                     metrics.ocr_pages_total.labels(engine=engine_name).inc()
diff --git a/ocr-service/test_metrics.py b/ocr-service/test_metrics.py
index 783e1d25..3ed02c31 100644
--- a/ocr-service/test_metrics.py
+++ b/ocr-service/test_metrics.py
@@ -444,6 +444,89 @@ async def test_ocr_models_ready_gauge_is_one_after_lifespan_startup(fresh_metric
             assert fresh_metrics.ocr_models_ready._value.get() == 1.0
 
 
+@pytest.mark.asyncio
+async def test_ocr_processing_seconds_histogram_observed_per_page_in_guided_stream(fresh_metrics):
+    """The guided streaming generator observes ocr_processing_seconds once per page."""
+    mock_images = [Image.new("RGB", (100, 100)) for _ in range(2)]
+    regions = [
+        {"pageNumber": 1, "x": 0.0, "y": 0.0, "width": 0.5, "height": 0.5, "annotationId": "a1"},
+        {"pageNumber": 2, "x": 0.0, "y": 0.0, "width": 1.0, "height": 1.0, "annotationId": "a2"},
+    ]
+
+    with patch("main.kraken_engine.load_models"), \
+         patch("main.load_spell_checker"), \
+         patch("main._download_and_convert_pdf", new_callable=AsyncMock, return_value=mock_images), \
+         patch("main.preprocess_page", side_effect=lambda img: img), \
+         patch("main.surya_engine.extract_region_text", return_value="text"):
+        async with AsyncClient(transport=ASGITransport(app=app), base_url="http://test") as client:
+            import main as main_module
+            main_module._models_ready = True
+            try:
+                async with client.stream("POST", "/ocr/stream", json={
+                    "pdfUrl": "http://minio/doc.pdf",
+                    "scriptType": "TYPEWRITER",
+                    "language": "de",
+                    "regions": regions,
+                }) as response:
+                    assert response.status_code == 200
+                    async for _ in response.aiter_lines():
+                        pass
+            finally:
+                main_module._models_ready = False
+
+    sum_seconds, count = _histogram_count_sum(
+        fresh_metrics.ocr_processing_seconds, engine="surya"
+    )
+    assert count == 2.0
+    assert sum_seconds >= 0.0
+
+
+@pytest.mark.asyncio
+async def test_ocr_processing_seconds_histogram_excludes_spell_check_time_in_guided_stream(fresh_metrics):
+    """The guided observation must time engine work only, not the spell-check pass."""
+    mock_images = [Image.new("RGB", (100, 100))]
+    regions = [
+        {"pageNumber": 1, "x": 0.0, "y": 0.0, "width": 0.5, "height": 0.5, "annotationId": "a1"},
+        {"pageNumber": 1, "x": 0.5, "y": 0.0, "width": 0.5, "height": 0.5, "annotationId": "a2"},
+    ]
+
+    def slow_correct(text):
+        import time as _time
+        _time.sleep(0.05)
+        return text
+
+    with patch("main.kraken_engine.load_models"), \
+         patch("main.load_spell_checker"), \
+         patch("main._download_and_convert_pdf", new_callable=AsyncMock, return_value=mock_images), \
+         patch("main.preprocess_page", side_effect=lambda img: img), \
+         patch("main.kraken_engine.is_available", return_value=True), \
+         patch("main.kraken_engine.extract_region_text", return_value="text"), \
+         patch("main.correct_text", side_effect=slow_correct):
+        async with AsyncClient(transport=ASGITransport(app=app), base_url="http://test") as client:
+            import main as main_module
+            main_module._models_ready = True
+            try:
+                async with client.stream("POST", "/ocr/stream", json={
+                    "pdfUrl": "http://minio/doc.pdf",
+                    "scriptType": "HANDWRITING_KURRENT",
+                    "language": "de",
+                    "regions": regions,
+                }) as response:
+                    assert response.status_code == 200
+                    async for _ in response.aiter_lines():
+                        pass
+            finally:
+                main_module._models_ready = False
+
+    sum_seconds, _ = _histogram_count_sum(
+        fresh_metrics.ocr_processing_seconds, engine="kraken"
+    )
+    # Spell-check sleeps 0.05s per region × 2 regions = 0.1s; engine work is instantaneous.
+    # If timing included spell-check, sum_seconds would be >= 0.1s. Allow 30ms slack
+    # for scheduler overhead.
+    assert sum_seconds < 0.05, f"timing must exclude spell-check; got sum={sum_seconds}"
+
+
 @pytest.mark.asyncio
 async def test_ocr_jobs_total_not_incremented_when_pdf_download_fails_in_stream(fresh_metrics):
     """If `_download_and_convert_pdf` raises, ocr_jobs_total is NOT incremented.
-- 
2.49.1


From 549cb15845706be5dc151303bb9d777c5d2e60a9 Mon Sep 17 00:00:00 2001
From: Marcel <marcel@familienarchiv>
Date: Thu, 21 May 2026 16:53:48 +0200
Subject: [PATCH 17/27] test(ocr): cover /train-sender counter and
 accuracy=None gauge default
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Two regression tests:
- /train-sender hitting the success path bumps the recognition counter
  (previously only /train and /segtrain were covered).
- A successful run whose result.accuracy is None must not call set() on
  ocr_model_accuracy — the gauge stays at its default 0.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 ocr-service/test_metrics.py | 46 +++++++++++++++++++++++++++++++++++++
 1 file changed, 46 insertions(+)

diff --git a/ocr-service/test_metrics.py b/ocr-service/test_metrics.py
index 3ed02c31..6aa047e0 100644
--- a/ocr-service/test_metrics.py
+++ b/ocr-service/test_metrics.py
@@ -390,6 +390,52 @@ async def test_ocr_training_runs_total_incremented_with_segmentation_success_lab
     )._value.get() == 1.0
 
 
+@pytest.mark.asyncio
+async def test_ocr_training_runs_total_incremented_with_recognition_success_label_for_train_sender(fresh_metrics):
+    """/train-sender success increments ocr_training_runs_total{kind=recognition, outcome=success}."""
+    async def fake_to_thread(func, *args, **kwargs):
+        return _fake_training_result()
+
+    with patch("main.TRAINING_TOKEN", "secret-token"), \
+         patch("main._models_ready", True), \
+         patch("main.asyncio.to_thread", side_effect=fake_to_thread):
+        async with AsyncClient(transport=ASGITransport(app=app), base_url="http://test") as client:
+            response = await client.post(
+                "/train-sender",
+                files={"file": ("training.zip", _minimal_zip(), "application/zip")},
+                data={"output_model_path": "/app/models/sender_test.mlmodel"},
+                headers={"X-Training-Token": "secret-token"},
+            )
+
+    assert response.status_code == 200, response.text
+    assert fresh_metrics.ocr_training_runs_total.labels(
+        kind="recognition", outcome="success"
+    )._value.get() == 1.0
+
+
+@pytest.mark.asyncio
+async def test_ocr_model_accuracy_gauge_stays_default_when_training_returns_no_accuracy(fresh_metrics):
+    """When the runner returns accuracy=None, ocr_model_accuracy must remain at its default 0."""
+    async def fake_to_thread(func, *args, **kwargs):
+        return {"loss": None, "accuracy": None, "cer": None, "epochs": 5}
+
+    with patch("main.TRAINING_TOKEN", "secret-token"), \
+         patch("main._models_ready", True), \
+         patch("main.asyncio.to_thread", side_effect=fake_to_thread):
+        async with AsyncClient(transport=ASGITransport(app=app), base_url="http://test") as client:
+            response = await client.post(
+                "/train",
+                files={"file": ("training.zip", _minimal_zip(), "application/zip")},
+                headers={"X-Training-Token": "secret-token"},
+            )
+
+    assert response.status_code == 200
+    # Gauge was never .set() — accessing the label child still creates it with default 0.0.
+    assert fresh_metrics.ocr_model_accuracy.labels(
+        kind="recognition"
+    )._value.get() == 0.0
+
+
 @pytest.mark.asyncio
 async def test_ocr_model_accuracy_gauge_set_per_kind_after_successful_training(fresh_metrics):
     """After /train and /segtrain succeed, ocr_model_accuracy{kind=...} reflects the result."""
-- 
2.49.1


From 0fc0cbcffdc8bab071df74cc0d7899d2d17c2b84 Mon Sep 17 00:00:00 2001
From: Marcel <marcel@familienarchiv>
Date: Thu, 21 May 2026 16:54:24 +0200
Subject: [PATCH 18/27] test(ocr): lock in MetricsPathFilter fail-open behavior

If uvicorn's access log format ever changes (args=None, or shorter
than 3 elements), the filter must keep forwarding records rather than
silently dropping them. Two extra LogRecords cover both edge cases.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 ocr-service/test_metrics.py | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/ocr-service/test_metrics.py b/ocr-service/test_metrics.py
index 6aa047e0..d8099a24 100644
--- a/ocr-service/test_metrics.py
+++ b/ocr-service/test_metrics.py
@@ -605,6 +605,29 @@ async def test_ocr_jobs_total_not_incremented_when_pdf_download_fails_in_stream(
     )._value.get() == 0.0
 
 
+def test_uvicorn_access_log_filter_fails_open_on_short_or_missing_args():
+    """The filter must default-allow records when args is None or shorter than expected.
+
+    Locks in fail-open behavior: if uvicorn ever changes its format we keep
+    forwarding records to the handler rather than silently dropping logs.
+    """
+    import logging as _logging
+    from main import MetricsPathFilter
+
+    filt = MetricsPathFilter()
+    none_record = _logging.LogRecord(
+        name="uvicorn.access", level=_logging.INFO, pathname="", lineno=0,
+        msg="some message", args=None, exc_info=None,
+    )
+    short_record = _logging.LogRecord(
+        name="uvicorn.access", level=_logging.INFO, pathname="", lineno=0,
+        msg="%s %s", args=("a", "b"), exc_info=None,
+    )
+
+    assert filt.filter(none_record) is True
+    assert filt.filter(short_record) is True
+
+
 def test_uvicorn_access_log_filter_skips_metrics_path():
     """The MetricsPathFilter drops uvicorn.access log records that target /metrics."""
     import logging as _logging
-- 
2.49.1


From 0179e93a4b6551682dd00b3dd84b37242e2bc0b3 Mon Sep 17 00:00:00 2001
From: Marcel <marcel@familienarchiv>
Date: Thu, 21 May 2026 16:55:14 +0200
Subject: [PATCH 19/27] test(ocr): narrow training error test to subprocess.run
 seam
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The asyncio.to_thread patch stubbed out the entire _run_training call,
hiding the real error path. Replacing it with a failing CompletedProcess
from subprocess.run exercises the actual ketos-failed branch and keeps
the test's intent — error counter bumps, 500 surfaces — intact.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 ocr-service/test_metrics.py | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/ocr-service/test_metrics.py b/ocr-service/test_metrics.py
index d8099a24..1e6ab7fa 100644
--- a/ocr-service/test_metrics.py
+++ b/ocr-service/test_metrics.py
@@ -347,13 +347,21 @@ async def test_ocr_training_runs_total_incremented_with_recognition_success_labe
 
 @pytest.mark.asyncio
 async def test_ocr_training_runs_total_incremented_with_recognition_error_label(fresh_metrics):
-    """When /train's inner runner raises, the error counter bumps and the exception propagates."""
-    async def fake_to_thread(func, *args, **kwargs):
-        raise RuntimeError("ketos train failed (exit 1): synthetic")
+    """When ketos exits non-zero, the error counter bumps and the exception propagates.
+
+    Uses the narrowest available seam — `subprocess.run` returning a failing
+    CompletedProcess — instead of stubbing the asyncio.to_thread boundary,
+    so the test exercises the real _run_training error path.
+    """
+    from subprocess import CompletedProcess
+
+    failing_proc = CompletedProcess(
+        args=["ketos"], returncode=1, stdout="", stderr="synthetic ketos failure"
+    )
 
     with patch("main.TRAINING_TOKEN", "secret-token"), \
          patch("main._models_ready", True), \
-         patch("main.asyncio.to_thread", side_effect=fake_to_thread):
+         patch("main.subprocess.run", return_value=failing_proc):
         transport = ASGITransport(app=app, raise_app_exceptions=False)
         async with AsyncClient(transport=transport, base_url="http://test") as client:
             response = await client.post(
-- 
2.49.1


From 22a5ee816af65b5e9757b7fa9651483e673bbeb1 Mon Sep 17 00:00:00 2001
From: Marcel <marcel@familienarchiv>
Date: Thu, 21 May 2026 16:57:18 +0200
Subject: [PATCH 20/27] refactor(ocr): extract _observe_block_words for word
 counter sites
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The two block-iteration loops (/ocr and /ocr/stream's standard generator)
both ran the same word-total and illegible-word increments. Lift them
into a single helper so each call site becomes one line and the counter
intent reads cleanly. Pure refactor — no behavior change, tests stay green.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 ocr-service/main.py | 22 ++++++++++++++--------
 1 file changed, 14 insertions(+), 8 deletions(-)

diff --git a/ocr-service/main.py b/ocr-service/main.py
index 31fd9cad..51cf737b 100644
--- a/ocr-service/main.py
+++ b/ocr-service/main.py
@@ -50,6 +50,18 @@ ALLOWED_PDF_HOSTS = set(
 _SPELL_CHECK_SCRIPT_TYPES = {"HANDWRITING_KURRENT", "HANDWRITING_LATIN"}
 
 
+def _observe_block_words(words: list[dict], threshold: float) -> None:
+    """Record per-block word counts and below-threshold word counts.
+
+    Pre: `words` is non-empty. Caller checks for that — keeping the helper
+    branch-free makes the call sites read as a single line.
+    """
+    metrics.ocr_words_total.inc(len(words))
+    metrics.ocr_illegible_words_total.inc(
+        sum(1 for w in words if w["confidence"] < threshold)
+    )
+
+
 def _validate_url(url: str) -> None:
     """Validate that the PDF URL points to an allowed host (SSRF protection)."""
     parsed = urlparse(url)
@@ -149,10 +161,7 @@ async def run_ocr(request: OcrRequest):
     for block in blocks:
         words = block.get("words") or []
         if words:
-            metrics.ocr_words_total.inc(len(words))
-            metrics.ocr_illegible_words_total.inc(
-                sum(1 for w in words if w["confidence"] < threshold)
-            )
+            _observe_block_words(words, threshold)
             block["text"] = apply_confidence_markers(words, threshold)
         block.pop("words", None)
         if script_type in _SPELL_CHECK_SCRIPT_TYPES:
@@ -298,10 +307,7 @@ async def run_ocr_stream(request: OcrRequest):
                 for block in blocks:
                     words = block.get("words") or []
                     if words:
-                        metrics.ocr_words_total.inc(len(words))
-                        metrics.ocr_illegible_words_total.inc(
-                            sum(1 for w in words if w["confidence"] < threshold)
-                        )
+                        _observe_block_words(words, threshold)
                         block["text"] = apply_confidence_markers(words, threshold)
                     block.pop("words", None)
                     if script_type in _SPELL_CHECK_SCRIPT_TYPES:
-- 
2.49.1


From df952861c4605dc5ec2721c6b7b9ffe43276fec6 Mon Sep 17 00:00:00 2001
From: Marcel <marcel@familienarchiv>
Date: Thu, 21 May 2026 16:58:40 +0200
Subject: [PATCH 21/27] refactor(ocr): extract _record_training for shared
 metric bookkeeping
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The /train, /train-sender, and /segtrain endpoints each duplicated the
same eight-line try/except + counter + gauge block around the
asyncio.to_thread call. Lift it into _record_training(runner, kind),
which accepts a sync- or async-returning callable for flexibility.
Each endpoint now ends with a single return line. Behaviour preserved —
status codes, error propagation, and metric labels stay identical.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 ocr-service/main.py | 58 ++++++++++++++++++++++++---------------------
 1 file changed, 31 insertions(+), 27 deletions(-)

diff --git a/ocr-service/main.py b/ocr-service/main.py
index 51cf737b..489fd98e 100644
--- a/ocr-service/main.py
+++ b/ocr-service/main.py
@@ -2,6 +2,7 @@
 
 import asyncio
 import glob
+import inspect
 import io
 import json
 import logging
@@ -14,6 +15,7 @@ import time
 import zipfile
 from contextlib import asynccontextmanager
 from datetime import datetime, timezone
+from typing import Awaitable, Callable
 from urllib.parse import urlparse
 
 import httpx
@@ -50,6 +52,30 @@ ALLOWED_PDF_HOSTS = set(
 _SPELL_CHECK_SCRIPT_TYPES = {"HANDWRITING_KURRENT", "HANDWRITING_LATIN"}
 
 
+async def _record_training(
+    runner: Callable[[], Awaitable[dict] | dict],
+    kind: str,
+) -> dict:
+    """Run a training callable and record outcome + accuracy metrics.
+
+    Wraps the per-endpoint try/except + outcome counter + accuracy gauge
+    block that used to be repeated at /train, /train-sender, and /segtrain.
+    The runner returns a dict with at least an `accuracy` key; if its value
+    is None, the gauge is left at its default.
+    """
+    try:
+        result = runner()
+        if inspect.isawaitable(result):
+            result = await result
+    except Exception:
+        metrics.ocr_training_runs_total.labels(kind=kind, outcome="error").inc()
+        raise
+    metrics.ocr_training_runs_total.labels(kind=kind, outcome="success").inc()
+    if result.get("accuracy") is not None:
+        metrics.ocr_model_accuracy.labels(kind=kind).set(result["accuracy"])
+    return result
+
+
 def _observe_block_words(words: list[dict], threshold: float) -> None:
     """Record per-block word counts and below-threshold word counts.
 
@@ -504,15 +530,7 @@ async def train_model(
 
             return {"loss": None, "accuracy": accuracy, "cer": cer, "epochs": epochs}
 
-    try:
-        result = await asyncio.to_thread(_run_training)
-    except Exception:
-        metrics.ocr_training_runs_total.labels(kind="recognition", outcome="error").inc()
-        raise
-    metrics.ocr_training_runs_total.labels(kind="recognition", outcome="success").inc()
-    if result.get("accuracy") is not None:
-        metrics.ocr_model_accuracy.labels(kind="recognition").set(result["accuracy"])
-    return result
+    return await _record_training(lambda: asyncio.to_thread(_run_training), kind="recognition")
 
 
 @app.post("/train-sender")
@@ -591,15 +609,9 @@ async def train_sender_model(
 
             return {"loss": None, "accuracy": accuracy, "cer": cer, "epochs": epochs}
 
-    try:
-        result = await asyncio.to_thread(_run_sender_training)
-    except Exception:
-        metrics.ocr_training_runs_total.labels(kind="recognition", outcome="error").inc()
-        raise
-    metrics.ocr_training_runs_total.labels(kind="recognition", outcome="success").inc()
-    if result.get("accuracy") is not None:
-        metrics.ocr_model_accuracy.labels(kind="recognition").set(result["accuracy"])
-    return result
+    return await _record_training(
+        lambda: asyncio.to_thread(_run_sender_training), kind="recognition"
+    )
 
 
 @app.post("/segtrain")
@@ -708,15 +720,7 @@ async def segtrain_model(
 
             return {"loss": None, "accuracy": accuracy, "cer": cer, "epochs": epochs}
 
-    try:
-        result = await asyncio.to_thread(_run_segtrain)
-    except Exception:
-        metrics.ocr_training_runs_total.labels(kind="segmentation", outcome="error").inc()
-        raise
-    metrics.ocr_training_runs_total.labels(kind="segmentation", outcome="success").inc()
-    if result.get("accuracy") is not None:
-        metrics.ocr_model_accuracy.labels(kind="segmentation").set(result["accuracy"])
-    return result
+    return await _record_training(lambda: asyncio.to_thread(_run_segtrain), kind="segmentation")
 
 
 async def _download_and_convert_pdf(url: str) -> list[Image.Image]:
-- 
2.49.1


From ddf6cf4cbca0360a5310ecd1642ec56de35ddec9 Mon Sep 17 00:00:00 2001
From: Marcel <marcel@familienarchiv>
Date: Thu, 21 May 2026 17:03:29 +0200
Subject: [PATCH 22/27] test(ocr): collapse shared client setup into ocr_client
 helper
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Each metrics test was repeating the same five-line block — patch
kraken_engine.load_models, patch load_spell_checker, instantiate the
AsyncClient, force _models_ready True, restore it. Lift the lot into a
single async context manager so each test body shrinks to its real
arrange / act / assert intent.

Tests that drive the lifespan directly (models_ready gauge) or stub
asyncio.to_thread for /train (which already patches _models_ready) stay
unchanged.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 ocr-service/test_metrics.py | 255 +++++++++++++++---------------------
 1 file changed, 102 insertions(+), 153 deletions(-)

diff --git a/ocr-service/test_metrics.py b/ocr-service/test_metrics.py
index 1e6ab7fa..253cae14 100644
--- a/ocr-service/test_metrics.py
+++ b/ocr-service/test_metrics.py
@@ -4,6 +4,7 @@ Each test that asserts on a counter/gauge value uses a fresh CollectorRegistry
 (see decision #3 on issue #652) to keep the metrics isolated between tests.
 """
 
+import contextlib
 import io
 import zipfile
 from unittest.mock import AsyncMock, patch
@@ -17,6 +18,26 @@ from main import app
 from metrics import build_metrics
 
 
+@contextlib.asynccontextmanager
+async def ocr_client(*, raise_app_exceptions: bool = True):
+    """Yield an AsyncClient with model-loaders patched and _models_ready forced on.
+
+    The shared setup for almost every metrics test: stub the heavy lifecycle
+    hooks (kraken_engine.load_models, load_spell_checker), flip the readiness
+    flag so request handlers do not 503, and restore it afterwards.
+    """
+    with patch("main.kraken_engine.load_models"), \
+         patch("main.load_spell_checker"):
+        transport = ASGITransport(app=app, raise_app_exceptions=raise_app_exceptions)
+        async with AsyncClient(transport=transport, base_url="http://test") as client:
+            import main as main_module
+            main_module._models_ready = True
+            try:
+                yield client
+            finally:
+                main_module._models_ready = False
+
+
 def _minimal_zip() -> bytes:
     """Return a ZIP containing one fake .xml so endpoint validation passes."""
     buf = io.BytesIO()
@@ -57,26 +78,18 @@ async def test_metrics_includes_http_request_metrics_after_ocr_call():
     mock_blocks = [{"pageNumber": 1, "x": 0.0, "y": 0.0, "width": 1.0, "height": 1.0,
                     "polygon": None, "text": "hi", "words": []}]
 
-    with patch("main.kraken_engine.load_models"), \
-         patch("main.load_spell_checker"), \
-         patch("main._download_and_convert_pdf", new_callable=AsyncMock, return_value=mock_images), \
+    with patch("main._download_and_convert_pdf", new_callable=AsyncMock, return_value=mock_images), \
          patch("main.preprocess_page", side_effect=lambda img: img), \
          patch("main.surya_engine.extract_blocks", return_value=mock_blocks):
-        async with AsyncClient(transport=ASGITransport(app=app), base_url="http://test") as client:
-            # Models need to be loaded for /ocr to accept requests; force the flag.
-            import main as main_module
-            main_module._models_ready = True
-            try:
-                ocr_response = await client.post("/ocr", json={
-                    "pdfUrl": "http://minio/doc.pdf",
-                    "scriptType": "TYPEWRITER",
-                    "language": "de",
-                })
-                assert ocr_response.status_code == 200, ocr_response.text
+        async with ocr_client() as client:
+            ocr_response = await client.post("/ocr", json={
+                "pdfUrl": "http://minio/doc.pdf",
+                "scriptType": "TYPEWRITER",
+                "language": "de",
+            })
+            assert ocr_response.status_code == 200, ocr_response.text
 
-                metrics_response = await client.get("/metrics")
-            finally:
-                main_module._models_ready = False
+            metrics_response = await client.get("/metrics")
 
     body = metrics_response.text
     assert "http_requests_total" in body
@@ -124,20 +137,13 @@ async def test_ocr_jobs_total_incremented_with_kraken_engine_label_for_kurrent(f
     mock_blocks = [{"pageNumber": 1, "x": 0.0, "y": 0.0, "width": 1.0, "height": 1.0,
                     "polygon": None, "text": "hi", "words": []}]
 
-    with patch("main.kraken_engine.load_models"), \
-         patch("main.load_spell_checker"), \
-         patch("main.correct_text", side_effect=lambda t: t), \
+    with patch("main.correct_text", side_effect=lambda t: t), \
          patch("main._download_and_convert_pdf", new_callable=AsyncMock, return_value=mock_images), \
          patch("main.preprocess_page", side_effect=lambda img: img), \
          patch("main.kraken_engine.is_available", return_value=True), \
          patch("main.kraken_engine.extract_blocks", return_value=mock_blocks):
-        async with AsyncClient(transport=ASGITransport(app=app), base_url="http://test") as client:
-            import main as main_module
-            main_module._models_ready = True
-            try:
-                await _drive_ocr(client, script_type="HANDWRITING_KURRENT")
-            finally:
-                main_module._models_ready = False
+        async with ocr_client() as client:
+            await _drive_ocr(client, script_type="HANDWRITING_KURRENT")
 
     value = fresh_metrics.ocr_jobs_total.labels(
         engine="kraken", script_type="HANDWRITING_KURRENT"
@@ -152,18 +158,11 @@ async def test_ocr_jobs_total_incremented_with_surya_engine_label_for_typewriter
     mock_blocks = [{"pageNumber": 1, "x": 0.0, "y": 0.0, "width": 1.0, "height": 1.0,
                     "polygon": None, "text": "hi", "words": []}]
 
-    with patch("main.kraken_engine.load_models"), \
-         patch("main.load_spell_checker"), \
-         patch("main._download_and_convert_pdf", new_callable=AsyncMock, return_value=mock_images), \
+    with patch("main._download_and_convert_pdf", new_callable=AsyncMock, return_value=mock_images), \
          patch("main.preprocess_page", side_effect=lambda img: img), \
          patch("main.surya_engine.extract_blocks", return_value=mock_blocks):
-        async with AsyncClient(transport=ASGITransport(app=app), base_url="http://test") as client:
-            import main as main_module
-            main_module._models_ready = True
-            try:
-                await _drive_ocr(client, script_type="TYPEWRITER")
-            finally:
-                main_module._models_ready = False
+        async with ocr_client() as client:
+            await _drive_ocr(client, script_type="TYPEWRITER")
 
     value = fresh_metrics.ocr_jobs_total.labels(
         engine="surya", script_type="TYPEWRITER"
@@ -178,26 +177,19 @@ async def test_ocr_pages_total_incremented_once_per_page_in_stream(fresh_metrics
     mock_blocks = [{"pageNumber": 1, "x": 0.0, "y": 0.0, "width": 1.0, "height": 1.0,
                     "polygon": None, "text": "hi", "words": []}]
 
-    with patch("main.kraken_engine.load_models"), \
-         patch("main.load_spell_checker"), \
-         patch("main._download_and_convert_pdf", new_callable=AsyncMock, return_value=mock_images), \
+    with patch("main._download_and_convert_pdf", new_callable=AsyncMock, return_value=mock_images), \
          patch("main.preprocess_page", side_effect=lambda img: img), \
          patch("main.surya_engine.extract_page_blocks", return_value=mock_blocks):
-        async with AsyncClient(transport=ASGITransport(app=app), base_url="http://test") as client:
-            import main as main_module
-            main_module._models_ready = True
-            try:
-                async with client.stream("POST", "/ocr/stream", json={
-                    "pdfUrl": "http://minio/doc.pdf",
-                    "scriptType": "TYPEWRITER",
-                    "language": "de",
-                }) as response:
-                    assert response.status_code == 200
-                    # Drain the stream so all per-page increments fire.
-                    async for _ in response.aiter_lines():
-                        pass
-            finally:
-                main_module._models_ready = False
+        async with ocr_client() as client:
+            async with client.stream("POST", "/ocr/stream", json={
+                "pdfUrl": "http://minio/doc.pdf",
+                "scriptType": "TYPEWRITER",
+                "language": "de",
+            }) as response:
+                assert response.status_code == 200
+                # Drain the stream so all per-page increments fire.
+                async for _ in response.aiter_lines():
+                    pass
 
     value = fresh_metrics.ocr_pages_total.labels(engine="surya")._value.get()
     assert value == 3.0
@@ -218,28 +210,21 @@ async def test_ocr_skipped_pages_total_incremented_when_engine_raises_for_a_page
             raise RuntimeError("synthetic engine failure")
         return good_blocks
 
-    with patch("main.kraken_engine.load_models"), \
-         patch("main.load_spell_checker"), \
-         patch("main._download_and_convert_pdf", new_callable=AsyncMock, return_value=mock_images), \
+    with patch("main._download_and_convert_pdf", new_callable=AsyncMock, return_value=mock_images), \
          patch("main.preprocess_page", side_effect=lambda img: img), \
          patch("main.surya_engine.extract_page_blocks", side_effect=extract_side_effect):
-        async with AsyncClient(transport=ASGITransport(app=app), base_url="http://test") as client:
-            import main as main_module
-            main_module._models_ready = True
-            try:
-                async with client.stream("POST", "/ocr/stream", json={
-                    "pdfUrl": "http://minio/doc.pdf",
-                    "scriptType": "TYPEWRITER",
-                    "language": "de",
-                }) as response:
-                    assert response.status_code == 200
-                    saw_error = False
-                    async for line in response.aiter_lines():
-                        if line and '"type": "error"' in line:
-                            saw_error = True
-                    assert saw_error
-            finally:
-                main_module._models_ready = False
+        async with ocr_client() as client:
+            async with client.stream("POST", "/ocr/stream", json={
+                "pdfUrl": "http://minio/doc.pdf",
+                "scriptType": "TYPEWRITER",
+                "language": "de",
+            }) as response:
+                assert response.status_code == 200
+                saw_error = False
+                async for line in response.aiter_lines():
+                    if line and '"type": "error"' in line:
+                        saw_error = True
+                assert saw_error
 
     assert fresh_metrics.ocr_skipped_pages_total._value.get() == 1.0
     # The second page still succeeds.
@@ -266,18 +251,11 @@ async def test_ocr_words_and_illegible_words_total_sum_across_blocks(fresh_metri
                    {"text": "Karl", "confidence": 0.95}]},
     ]
 
-    with patch("main.kraken_engine.load_models"), \
-         patch("main.load_spell_checker"), \
-         patch("main._download_and_convert_pdf", new_callable=AsyncMock, return_value=mock_images), \
+    with patch("main._download_and_convert_pdf", new_callable=AsyncMock, return_value=mock_images), \
          patch("main.preprocess_page", side_effect=lambda img: img), \
          patch("main.surya_engine.extract_blocks", return_value=mock_blocks):
-        async with AsyncClient(transport=ASGITransport(app=app), base_url="http://test") as client:
-            import main as main_module
-            main_module._models_ready = True
-            try:
-                await _drive_ocr(client, script_type="TYPEWRITER")
-            finally:
-                main_module._models_ready = False
+        async with ocr_client() as client:
+            await _drive_ocr(client, script_type="TYPEWRITER")
 
     assert fresh_metrics.ocr_words_total._value.get() == 5.0
     assert fresh_metrics.ocr_illegible_words_total._value.get() == 2.0
@@ -296,25 +274,18 @@ async def test_ocr_processing_seconds_histogram_observed_per_page_in_stream(fres
     mock_blocks = [{"pageNumber": 1, "x": 0.0, "y": 0.0, "width": 1.0, "height": 1.0,
                     "polygon": None, "text": "ok", "words": []}]
 
-    with patch("main.kraken_engine.load_models"), \
-         patch("main.load_spell_checker"), \
-         patch("main._download_and_convert_pdf", new_callable=AsyncMock, return_value=mock_images), \
+    with patch("main._download_and_convert_pdf", new_callable=AsyncMock, return_value=mock_images), \
          patch("main.preprocess_page", side_effect=lambda img: img), \
          patch("main.surya_engine.extract_page_blocks", return_value=mock_blocks):
-        async with AsyncClient(transport=ASGITransport(app=app), base_url="http://test") as client:
-            import main as main_module
-            main_module._models_ready = True
-            try:
-                async with client.stream("POST", "/ocr/stream", json={
-                    "pdfUrl": "http://minio/doc.pdf",
-                    "scriptType": "TYPEWRITER",
-                    "language": "de",
-                }) as response:
-                    assert response.status_code == 200
-                    async for _ in response.aiter_lines():
-                        pass
-            finally:
-                main_module._models_ready = False
+        async with ocr_client() as client:
+            async with client.stream("POST", "/ocr/stream", json={
+                "pdfUrl": "http://minio/doc.pdf",
+                "scriptType": "TYPEWRITER",
+                "language": "de",
+            }) as response:
+                assert response.status_code == 200
+                async for _ in response.aiter_lines():
+                    pass
 
     sum_seconds, count = _histogram_count_sum(
         fresh_metrics.ocr_processing_seconds, engine="surya"
@@ -507,26 +478,19 @@ async def test_ocr_processing_seconds_histogram_observed_per_page_in_guided_stre
         {"pageNumber": 2, "x": 0.0, "y": 0.0, "width": 1.0, "height": 1.0, "annotationId": "a2"},
     ]
 
-    with patch("main.kraken_engine.load_models"), \
-         patch("main.load_spell_checker"), \
-         patch("main._download_and_convert_pdf", new_callable=AsyncMock, return_value=mock_images), \
+    with patch("main._download_and_convert_pdf", new_callable=AsyncMock, return_value=mock_images), \
          patch("main.preprocess_page", side_effect=lambda img: img), \
          patch("main.surya_engine.extract_region_text", return_value="text"):
-        async with AsyncClient(transport=ASGITransport(app=app), base_url="http://test") as client:
-            import main as main_module
-            main_module._models_ready = True
-            try:
-                async with client.stream("POST", "/ocr/stream", json={
-                    "pdfUrl": "http://minio/doc.pdf",
-                    "scriptType": "TYPEWRITER",
-                    "language": "de",
-                    "regions": regions,
-                }) as response:
-                    assert response.status_code == 200
-                    async for _ in response.aiter_lines():
-                        pass
-            finally:
-                main_module._models_ready = False
+        async with ocr_client() as client:
+            async with client.stream("POST", "/ocr/stream", json={
+                "pdfUrl": "http://minio/doc.pdf",
+                "scriptType": "TYPEWRITER",
+                "language": "de",
+                "regions": regions,
+            }) as response:
+                assert response.status_code == 200
+                async for _ in response.aiter_lines():
+                    pass
 
     sum_seconds, count = _histogram_count_sum(
         fresh_metrics.ocr_processing_seconds, engine="surya"
@@ -549,28 +513,21 @@ async def test_ocr_processing_seconds_histogram_excludes_spell_check_time_in_gui
         _time.sleep(0.05)
         return text
 
-    with patch("main.kraken_engine.load_models"), \
-         patch("main.load_spell_checker"), \
-         patch("main._download_and_convert_pdf", new_callable=AsyncMock, return_value=mock_images), \
+    with patch("main._download_and_convert_pdf", new_callable=AsyncMock, return_value=mock_images), \
          patch("main.preprocess_page", side_effect=lambda img: img), \
          patch("main.kraken_engine.is_available", return_value=True), \
          patch("main.kraken_engine.extract_region_text", return_value="text"), \
          patch("main.correct_text", side_effect=slow_correct):
-        async with AsyncClient(transport=ASGITransport(app=app), base_url="http://test") as client:
-            import main as main_module
-            main_module._models_ready = True
-            try:
-                async with client.stream("POST", "/ocr/stream", json={
-                    "pdfUrl": "http://minio/doc.pdf",
-                    "scriptType": "HANDWRITING_KURRENT",
-                    "language": "de",
-                    "regions": regions,
-                }) as response:
-                    assert response.status_code == 200
-                    async for _ in response.aiter_lines():
-                        pass
-            finally:
-                main_module._models_ready = False
+        async with ocr_client() as client:
+            async with client.stream("POST", "/ocr/stream", json={
+                "pdfUrl": "http://minio/doc.pdf",
+                "scriptType": "HANDWRITING_KURRENT",
+                "language": "de",
+                "regions": regions,
+            }) as response:
+                assert response.status_code == 200
+                async for _ in response.aiter_lines():
+                    pass
 
     sum_seconds, _ = _histogram_count_sum(
         fresh_metrics.ocr_processing_seconds, engine="kraken"
@@ -591,21 +548,13 @@ async def test_ocr_jobs_total_not_incremented_when_pdf_download_fails_in_stream(
     async def fail_download(url):
         raise RuntimeError("synthetic download failure")
 
-    with patch("main.kraken_engine.load_models"), \
-         patch("main.load_spell_checker"), \
-         patch("main._download_and_convert_pdf", new=fail_download):
-        transport = ASGITransport(app=app, raise_app_exceptions=False)
-        async with AsyncClient(transport=transport, base_url="http://test") as client:
-            import main as main_module
-            main_module._models_ready = True
-            try:
-                response = await client.post("/ocr/stream", json={
-                    "pdfUrl": "http://minio/doc.pdf",
-                    "scriptType": "TYPEWRITER",
-                    "language": "de",
-                })
-            finally:
-                main_module._models_ready = False
+    with patch("main._download_and_convert_pdf", new=fail_download):
+        async with ocr_client(raise_app_exceptions=False) as client:
+            response = await client.post("/ocr/stream", json={
+                "pdfUrl": "http://minio/doc.pdf",
+                "scriptType": "TYPEWRITER",
+                "language": "de",
+            })
 
     assert response.status_code == 500
     assert fresh_metrics.ocr_jobs_total.labels(
-- 
2.49.1


From 67368b4413389797c3de08fa26e077e2d9bf17cb Mon Sep 17 00:00:00 2001
From: Marcel <marcel@familienarchiv>
Date: Thu, 21 May 2026 17:04:28 +0200
Subject: [PATCH 23/27] docs(ocr): annotate metrics binding + /metrics exposure
 + pin client

Three small drops that pay back later:
- Note that main.metrics is import-time bound and tests must
  monkeypatch `main.metrics`, not the registry.
- Flag the /metrics endpoint as unauthenticated and cross-link the
  Caddy-block snippet in docs/OBSERVABILITY.md.
- Pin prometheus-client to the exact 0.25.0 patch version already
  resolved by prometheus-fastapi-instrumentator 7.0.0, so an upstream
  bump cannot silently slip in.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 ocr-service/main.py          | 7 +++++++
 ocr-service/requirements.txt | 1 +
 2 files changed, 8 insertions(+)

diff --git a/ocr-service/main.py b/ocr-service/main.py
index 489fd98e..bde3fb2f 100644
--- a/ocr-service/main.py
+++ b/ocr-service/main.py
@@ -43,6 +43,10 @@ logger = logging.getLogger(__name__)
 
 _models_ready = False
 
+# One-shot import-time binding to the default REGISTRY. Tests that need a
+# clean counter state must monkeypatch `main.metrics` with a container built
+# from a fresh CollectorRegistry — rebinding through the registry directly
+# will not retarget the references stored in the OcrMetrics dataclass.
 metrics: OcrMetrics = build_metrics(REGISTRY)
 
 ALLOWED_PDF_HOSTS = set(
@@ -117,6 +121,9 @@ async def lifespan(app: FastAPI):
 
 app = FastAPI(title="Familienarchiv OCR Service", lifespan=lifespan)
 
+# /metrics is unauthenticated — relies on Docker-internal-network exposure
+# only (CWE-200 risk if `ports:` ever maps 8000 to host). See
+# docs/OBSERVABILITY.md §Internal-only endpoints for the Caddy block snippet.
 Instrumentator(excluded_handlers=["/health", "/metrics"]).instrument(app).expose(app)
 
 
diff --git a/ocr-service/requirements.txt b/ocr-service/requirements.txt
index 8a9bedec..39c492cc 100644
--- a/ocr-service/requirements.txt
+++ b/ocr-service/requirements.txt
@@ -11,3 +11,4 @@ httpx==0.28.1
 pyspellchecker==0.9.0
 opencv-python-headless==4.11.0.86
 prometheus-fastapi-instrumentator==7.0.0
+prometheus-client==0.25.0
-- 
2.49.1


From 2dbb3c37b45d632922bedae61a3b04d7c18888c2 Mon Sep 17 00:00:00 2001
From: Marcel <marcel@familienarchiv>
Date: Thu, 21 May 2026 17:05:27 +0200
Subject: [PATCH 24/27] docs(observability): document ocr metrics, scrape edge,
 and access-log filter

- L2 container diagram now shows the Prometheus -> ocr:8000 scrape edge
  (plus the previously-undrawn Prometheus -> backend edge for symmetry).
- OBSERVABILITY.md gains a full ocr_* metrics table with labels, units,
  and the canonical example queries from issue #652.
- New "Internal-only endpoints" subsection captures the unauthenticated
  /metrics caveat and provides the Caddy block snippet for the case
  where the service ever gets a host port.
- Explicit note that MetricsPathFilter only quiets uvicorn stdout, and
  the OCR metrics must never carry PII or document content.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 docs/OBSERVABILITY.md                   | 69 ++++++++++++++++++++++++-
 docs/architecture/c4/l2-containers.puml |  2 +
 2 files changed, 70 insertions(+), 1 deletion(-)

diff --git a/docs/OBSERVABILITY.md b/docs/OBSERVABILITY.md
index b895e849..2b3855d6 100644
--- a/docs/OBSERVABILITY.md
+++ b/docs/OBSERVABILITY.md
@@ -118,11 +118,14 @@ To find a trace for a specific request in staging/production, either increase th
 
 ## Metrics (Prometheus → Grafana)
 
-Prometheus scrapes the backend management endpoint every 15 s:
+Prometheus scrapes two targets every 15 s:
 
 ```
 Target: backend:8081/actuator/prometheus
 Labels: job="spring-boot", application="Familienarchiv"
+
+Target: ocr:8000/metrics
+Labels: job="ocr-service"
 ```
 
 All Spring Boot metrics carry the `application="Familienarchiv"` tag, which is how the Grafana Spring Boot Observability dashboard (ID 17175) filters to this service.
@@ -146,6 +149,70 @@ jvm_memory_used_bytes{area="heap", application="Familienarchiv"}
 hikaricp_connections_active
 ```
 
+### OCR-service custom metrics
+
+Exposed at `ocr:8000/metrics` by `prometheus-fastapi-instrumentator`. The
+`http_*` metrics describe the FastAPI request layer; the `ocr_*` series are
+domain-specific. **Never label these with PII or document content** — labels
+have unbounded cardinality risk and are visible to anyone with Grafana access.
+
+| Metric | Type | Labels | Unit | What it tracks |
+|---|---|---|---|---|
+| `ocr_jobs_total` | Counter | `engine` (`surya`/`kraken`), `script_type` | jobs | OCR jobs that started after a successful PDF download |
+| `ocr_pages_total` | Counter | `engine` | pages | Successfully OCR'd pages in the streaming generator |
+| `ocr_skipped_pages_total` | Counter | — | pages | Pages skipped because the engine raised on them |
+| `ocr_words_total` | Counter | — | words | Recognized words summed across every block |
+| `ocr_illegible_words_total` | Counter | — | words | Words below the confidence threshold (rendered as `[unleserlich]`) |
+| `ocr_processing_seconds` | Histogram | `engine` | seconds | Per-page (stream) or per-document (`/ocr`) engine time, excluding preprocessing |
+| `ocr_training_runs_total` | Counter | `kind` (`recognition`/`segmentation`), `outcome` (`success`/`error`) | runs | Completed training runs |
+| `ocr_model_accuracy` | Gauge | `kind` | ratio (0–1) | Latest accuracy reported by a successful training run |
+| `ocr_models_ready` | Gauge | — | 0\|1 | 1 once the lifespan startup has finished loading models |
+
+Canonical example queries (the same ones referenced in issue #652):
+
+```promql
+# OCR throughput by engine
+sum by (engine) (rate(ocr_pages_total[5m]))
+
+# Share of words rendered as [unleserlich]
+sum(rate(ocr_illegible_words_total[5m]))
+  / sum(rate(ocr_words_total[5m]))
+
+# p95 page processing time per engine
+histogram_quantile(0.95, sum by (engine, le) (
+  rate(ocr_processing_seconds_bucket[5m])
+))
+
+# Training error rate
+sum(rate(ocr_training_runs_total{outcome="error"}[1h]))
+  / sum(rate(ocr_training_runs_total[1h]))
+
+# Latest recognition vs segmentation accuracy
+ocr_model_accuracy
+```
+
+### Internal-only endpoints
+
+`/metrics` is exposed by the OCR service over plain HTTP without
+authentication. The container is reachable only on the internal Docker
+network — Caddy never proxies to it directly. If the service is ever
+exposed (e.g. a `ports:` mapping is added), block the endpoint at the
+reverse proxy:
+
+```caddy
+ocr.example.com {
+    @internal_only path /metrics /health
+    respond @internal_only 404
+    reverse_proxy ocr:8000
+}
+```
+
+The `MetricsPathFilter` in `ocr-service/main.py` suppresses uvicorn's
+**stdout** access log lines for `/metrics` and `/health` so the container
+console stays focused on real OCR traffic. Promtail/Loki still receive
+access lines from any other source. Treat the filter as console
+noise-control, not an audit-suppression mechanism.
+
 ## Errors (GlitchTip)
 
 GlitchTip receives errors from both the backend (via Sentry Java SDK) and the frontend (via Sentry JavaScript SDK). It groups events by fingerprint, tracks first/last seen times, and links to the release that introduced the error.
diff --git a/docs/architecture/c4/l2-containers.puml b/docs/architecture/c4/l2-containers.puml
index 346efe75..8d66a614 100644
--- a/docs/architecture/c4/l2-containers.puml
+++ b/docs/architecture/c4/l2-containers.puml
@@ -43,6 +43,8 @@ Rel(ocr, storage, "Fetches PDF via presigned URL", "HTTP / S3 presigned")
 Rel(mc, storage, "Bootstraps bucket + service account on startup", "MinIO Client CLI")
 Rel(promtail, loki, "Pushes log streams", "HTTP/Loki push API")
 Rel(backend, tempo, "Sends distributed traces via OTLP", "HTTP / OTLP / port 4318 (archiv-net)")
+Rel(prometheus, backend, "Scrapes JVM + HTTP metrics", "HTTP 8081 /actuator/prometheus")
+Rel(prometheus, ocr, "Scrapes OCR + http_* metrics", "HTTP 8000 /metrics")
 Rel(grafana, prometheus, "Queries metrics", "HTTP 9090")
 Rel(grafana, loki, "Queries logs", "HTTP 3100")
 Rel(grafana, tempo, "Queries traces", "HTTP 3200")
-- 
2.49.1


From 2df71beb7eac48469b3b963e68d64f4aa693a9c3 Mon Sep 17 00:00:00 2001
From: Marcel <marcel@familienarchiv>
Date: Thu, 21 May 2026 17:06:44 +0200
Subject: [PATCH 25/27] docs: add ADR-023 and glossary entries for OCR metrics
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

ADR-023 captures why prometheus-fastapi-instrumentator was chosen,
the build_metrics(registry) factory pattern, and the test rebinding
seam. The glossary gains four ops-aligned terms — illegible word,
models-ready gauge, recognition vs segmentation accuracy — so the
metrics documentation in OBSERVABILITY.md has a vocabulary to lean on.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 docs/GLOSSARY.md                              |  8 ++
 ...mentator-and-metrics-registry-injection.md | 94 +++++++++++++++++++
 2 files changed, 102 insertions(+)
 create mode 100644 docs/adr/023-prometheus-instrumentator-and-metrics-registry-injection.md

diff --git a/docs/GLOSSARY.md b/docs/GLOSSARY.md
index 21addb4c..99da1775 100644
--- a/docs/GLOSSARY.md
+++ b/docs/GLOSSARY.md
@@ -80,6 +80,14 @@ _See also [DocumentStatus lifecycle](#documentstatus-lifecycle)._
 
 **Sütterlin** — A specific standardized style of Kurrent taught in German schools from 1915 to 1941.
 
+**Illegible word** — a word whose recognition confidence falls below the configured threshold; replaced with the literal token `[unleserlich]` in the rendered block text and counted in the `ocr_illegible_words_total` Prometheus counter.
+
+**Models-ready gauge** — the `ocr_models_ready` Prometheus gauge, flipped from `0` to `1` once the FastAPI lifespan startup has finished loading the Kraken model and the spell-checker. Used both for the `/health` endpoint and as the supervised signal for the `ocr_models_ready < 1 for 2m` alert.
+
+**Recognition model accuracy** — the accuracy reported by `ketos train` for the recognition (text-line) model, exposed as `ocr_model_accuracy{kind="recognition"}`. Sourced from `_parse_best_checkpoint` on the highest-scoring checkpoint after training.
+
+**Segmentation model accuracy** — the accuracy reported by `ketos segtrain` for the baseline layout analysis (`blla`) model, exposed as `ocr_model_accuracy{kind="segmentation"}`. Distinct from recognition accuracy because the two models are trained and improved independently.
+
 ---
 
 ## Other Domain Terms
diff --git a/docs/adr/023-prometheus-instrumentator-and-metrics-registry-injection.md b/docs/adr/023-prometheus-instrumentator-and-metrics-registry-injection.md
new file mode 100644
index 00000000..5e8a1020
--- /dev/null
+++ b/docs/adr/023-prometheus-instrumentator-and-metrics-registry-injection.md
@@ -0,0 +1,94 @@
+# ADR-023: Prometheus Instrumentator and Metrics Registry Injection
+
+## Status
+
+Accepted
+
+## Context
+
+Until issue #652 the OCR service exposed no `/metrics` endpoint. The
+observability stack already scrapes the Spring Boot backend's actuator
+endpoint, but it had nothing to scrape on the Python side. Without HTTP-
+and domain-level metrics from `ocr-service` we cannot answer questions
+like "what is the share of words rendered as `[unleserlich]`" or
+"is the training error rate above its budget" from Grafana.
+
+Two implementation requirements influenced the design:
+
+1. **Counter / gauge isolation in tests.** `prometheus_client` collectors
+   are module-level singletons keyed by name on the global `REGISTRY`.
+   Re-importing or naively re-instantiating them raises a duplicated-
+   collector error and cross-test state leaks (a `.inc()` in test A is
+   still readable by test B). A test harness needs a way to swap the
+   active container for a fresh per-test instance.
+
+2. **Minimal blast radius on the request path.** We did not want to
+   hand-instrument every endpoint with FastAPI middleware. The
+   `prometheus-fastapi-instrumentator` library already provides
+   `http_requests_total`, `http_request_duration_seconds`, and the
+   `/metrics` exposition route, all idiomatic Prometheus names.
+
+## Decision
+
+- Add `prometheus-fastapi-instrumentator==7.0.0` and pin its transitive
+  dependency `prometheus-client==0.25.0` explicitly in
+  `ocr-service/requirements.txt`.
+- Mount the instrumentator once at module load:
+  `Instrumentator(excluded_handlers=["/health", "/metrics"]).instrument(app).expose(app)`.
+  This adds `/metrics` and an HTTP-level dashboard surface without
+  changing any endpoint code.
+- Define every domain metric (`ocr_jobs_total`, `ocr_pages_total`,
+  `ocr_processing_seconds`, …) inside a `build_metrics(registry)`
+  factory in `ocr-service/metrics.py` that returns a frozen `OcrMetrics`
+  dataclass. Production code binds the container to the default
+  `REGISTRY` once: `metrics: OcrMetrics = build_metrics(REGISTRY)`.
+- Tests use a `fresh_metrics` fixture that builds a new
+  `CollectorRegistry()` per test and monkeypatches `main.metrics` with
+  a container bound to it. The endpoint code keeps reading
+  `metrics.<name>` without knowing whether it is talking to the global
+  registry or a per-test one.
+
+## Consequences
+
+**Positive**
+
+- One reusable factory captures the metric definitions; future metrics
+  go in one place.
+- Tests run with full counter isolation. Cross-test state leakage is
+  impossible because each test sees its own dataclass instance.
+- The instrumentator gives us `http_*` metrics for free, including a
+  Grafana-ready histogram that pairs with the Spring Boot one.
+
+**Negative**
+
+- One extra level of indirection: any test that asserts on metric
+  values must remember to monkeypatch `main.metrics`, not the registry
+  directly. Rebinding through the registry is harmless but useless —
+  the dataclass holds references to the original collectors.
+- `prometheus-client` is now pinned. Upgrading it requires an explicit
+  bump and re-checking the instrumentator's compatibility range.
+- `/metrics` is exposed unauthenticated and relies on the Docker
+  internal network for confidentiality. See
+  [docs/OBSERVABILITY.md §Internal-only endpoints](../OBSERVABILITY.md)
+  for the Caddy snippet that must be added if the service ever gets a
+  host-side port mapping.
+
+## Alternatives considered
+
+- **Hand-roll the `/metrics` endpoint.** Rejected: would have meant
+  duplicating what `prometheus-fastapi-instrumentator` ships, plus
+  middleware for the HTTP histograms.
+- **Skip the factory; pass `registry` as a function argument
+  everywhere.** Rejected: clutters every endpoint signature and breaks
+  the symmetry with the Spring Boot side, which also relies on a
+  process-global Micrometer registry.
+- **Use a `pytest` autouse fixture that resets `REGISTRY` between
+  tests.** Rejected: `prometheus_client` does not expose a clean
+  "unregister all" hook, and we would be relying on private APIs.
+
+## References
+
+- Issue: [#652](https://git.raddatz.cloud/marcel/familienarchiv/issues/652)
+- Library: <https://github.com/trallnag/prometheus-fastapi-instrumentator>
+- Code: `ocr-service/metrics.py`, `ocr-service/main.py`,
+  `ocr-service/test_metrics.py`
-- 
2.49.1


From e0e1578bdd0e0088cedc2ca28a679c592a9b9b8a Mon Sep 17 00:00:00 2001
From: Marcel <marcel@familienarchiv>
Date: Thu, 21 May 2026 17:22:49 +0200
Subject: [PATCH 26/27] test(ocr): widen spell-check exclusion bound to 0.09s
 with rationale
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Sara's cycle-2 S1: the wall-clock assertion at < 0.05s could trip on a
slow CI runner under load even when the timer correctly excludes
spell-check. Sara's preferred structural fix (patch main.time.monotonic
with a deterministic sequence) proved awkward — the patched attribute is
the *global* time.monotonic which httpx and asyncio consume, exhausting
the sequence before the request reaches the engine loop.

Take the documented fallback: widen the bound to 0.09s and explain why.
The failure mode the test guards against (spell-check inside the timer)
would add 0.1s (2 × 0.05s sleep), so 0.09s catches the bug while leaving
~90ms of headroom for slow CI runners. Verified red→green by temporarily
moving correct_text inside the timer block: bound trips at 0.101s; the
fixed code reads ~0.001s.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 ocr-service/test_metrics.py | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/ocr-service/test_metrics.py b/ocr-service/test_metrics.py
index 253cae14..d2bd9671 100644
--- a/ocr-service/test_metrics.py
+++ b/ocr-service/test_metrics.py
@@ -501,7 +501,17 @@ async def test_ocr_processing_seconds_histogram_observed_per_page_in_guided_stre
 
 @pytest.mark.asyncio
 async def test_ocr_processing_seconds_histogram_excludes_spell_check_time_in_guided_stream(fresh_metrics):
-    """The guided observation must time engine work only, not the spell-check pass."""
+    """The guided observation must time engine work only, not the spell-check pass.
+
+    Wall-clock bound rather than a structural `patch("main.time.monotonic")`:
+    the patched attribute is the *global* `time.monotonic`, which httpx and
+    asyncio also consume — they exhaust the deterministic sequence before the
+    request reaches the engine loop. Bound is sized against the failure mode,
+    not the noise floor: spell-check sleeps 0.05s × 2 regions = 0.1s, so a
+    timer that accidentally wrapped `correct_text` would observe >= 0.1s. The
+    0.09s ceiling catches that bug while leaving ~90ms of slack for slow CI
+    runners (engine work is instantaneous under the mock).
+    """
     mock_images = [Image.new("RGB", (100, 100))]
     regions = [
         {"pageNumber": 1, "x": 0.0, "y": 0.0, "width": 0.5, "height": 0.5, "annotationId": "a1"},
@@ -532,10 +542,7 @@ async def test_ocr_processing_seconds_histogram_excludes_spell_check_time_in_gui
     sum_seconds, _ = _histogram_count_sum(
         fresh_metrics.ocr_processing_seconds, engine="kraken"
     )
-    # Spell-check sleeps 0.05s per region × 2 regions = 0.1s; engine work is instantaneous.
-    # If timing included spell-check, sum_seconds would be >= 0.1s. Allow 30ms slack
-    # for scheduler overhead.
-    assert sum_seconds < 0.05, f"timing must exclude spell-check; got sum={sum_seconds}"
+    assert sum_seconds < 0.09, f"timing must exclude spell-check; got sum={sum_seconds}"
 
 
 @pytest.mark.asyncio
-- 
2.49.1


From 0801da8df0a4d81fdbd0da32b3fbdb8a3ea722f6 Mon Sep 17 00:00:00 2001
From: Marcel <marcel@familienarchiv>
Date: Thu, 21 May 2026 17:23:32 +0200
Subject: [PATCH 27/27] docs(ocr): explain why two metrics tests skip
 fresh_metrics fixture
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Sara's cycle-2 S2: clarify the latent (but not actual) cross-test state
risk on the two metrics tests that hit the global REGISTRY instead of
the per-test fresh_metrics fixture. Migrating them would actually break
them — the /metrics endpoint is served by prometheus-fastapi-instrumentator
which binds to the default REGISTRY at app-construction time, and the
http_requests_total assertion only finds counters on that global
registry. Both tests already assert response shape only (status code,
content-type substring, body substrings), not numeric values, so the
shared-registry caveat is documented for future readers rather than
treated as a bug to fix.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 ocr-service/test_metrics.py | 20 ++++++++++++++++++--
 1 file changed, 18 insertions(+), 2 deletions(-)

diff --git a/ocr-service/test_metrics.py b/ocr-service/test_metrics.py
index d2bd9671..42afd275 100644
--- a/ocr-service/test_metrics.py
+++ b/ocr-service/test_metrics.py
@@ -61,7 +61,15 @@ def fresh_metrics(monkeypatch):
 
 @pytest.mark.asyncio
 async def test_metrics_endpoint_returns_200():
-    """`GET /metrics` returns 200 with Prometheus exposition content."""
+    """`GET /metrics` returns 200 with Prometheus exposition content.
+
+    Uses the global REGISTRY by design — does NOT take the `fresh_metrics` fixture.
+    The `/metrics` endpoint is wired by `prometheus-fastapi-instrumentator`, which
+    binds to the default REGISTRY at app-construction time; swapping `main.metrics`
+    via the fixture would not redirect what `/metrics` exposes. This test only
+    asserts response shape (status code + content-type substring), not numeric
+    counter values, so cross-test state leakage cannot affect it.
+    """
     with patch("main.kraken_engine.load_models"), \
          patch("main.load_spell_checker"):
         async with AsyncClient(transport=ASGITransport(app=app), base_url="http://test") as client:
@@ -73,7 +81,15 @@ async def test_metrics_endpoint_returns_200():
 
 @pytest.mark.asyncio
 async def test_metrics_includes_http_request_metrics_after_ocr_call():
-    """After a request to /ocr, `/metrics` exposes auto-instrumented http_* metrics."""
+    """After a request to /ocr, `/metrics` exposes auto-instrumented http_* metrics.
+
+    Uses the global REGISTRY by design — does NOT take the `fresh_metrics` fixture.
+    The `http_requests_total` / `http_request_duration_seconds` metrics live on
+    the instrumentator's default REGISTRY (not on `main.metrics`), so a fresh
+    CollectorRegistry would never see them. This test only asserts response shape
+    (substring presence in the exposition body), not numeric counter values, so
+    cross-test state leakage cannot affect it.
+    """
     mock_images = [Image.new("RGB", (100, 100))]
     mock_blocks = [{"pageNumber": 1, "x": 0.0, "y": 0.0, "width": 1.0, "height": 1.0,
                     "polygon": None, "text": "hi", "words": []}]
-- 
2.49.1