From e0e1578bdd0e0088cedc2ca28a679c592a9b9b8a Mon Sep 17 00:00:00 2001
From: Marcel <marcel@familienarchiv>
Date: Thu, 21 May 2026 17:22:49 +0200
Subject: [PATCH] test(ocr): widen spell-check exclusion bound to 0.09s with
 rationale
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Sara's cycle-2 S1: the wall-clock assertion at < 0.05s could trip on a
slow CI runner under load even when the timer correctly excludes
spell-check. Sara's preferred structural fix (patch main.time.monotonic
with a deterministic sequence) proved awkward — the patched attribute is
the *global* time.monotonic which httpx and asyncio consume, exhausting
the sequence before the request reaches the engine loop.

Take the documented fallback: widen the bound to 0.09s and explain why.
The failure mode the test guards against (spell-check inside the timer)
would add 0.1s (2 × 0.05s sleep), so 0.09s catches the bug while leaving
~90ms of headroom for slow CI runners. Verified red→green by temporarily
moving correct_text inside the timer block: bound trips at 0.101s; the
fixed code reads ~0.001s.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 ocr-service/test_metrics.py | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/ocr-service/test_metrics.py b/ocr-service/test_metrics.py
index 253cae14..d2bd9671 100644
--- a/ocr-service/test_metrics.py
+++ b/ocr-service/test_metrics.py
@@ -501,7 +501,17 @@ async def test_ocr_processing_seconds_histogram_observed_per_page_in_guided_stre
 
 @pytest.mark.asyncio
 async def test_ocr_processing_seconds_histogram_excludes_spell_check_time_in_guided_stream(fresh_metrics):
-    """The guided observation must time engine work only, not the spell-check pass."""
+    """The guided observation must time engine work only, not the spell-check pass.
+
+    Wall-clock bound rather than a structural `patch("main.time.monotonic")`:
+    the patched attribute is the *global* `time.monotonic`, which httpx and
+    asyncio also consume — they exhaust the deterministic sequence before the
+    request reaches the engine loop. Bound is sized against the failure mode,
+    not the noise floor: spell-check sleeps 0.05s × 2 regions = 0.1s, so a
+    timer that accidentally wrapped `correct_text` would observe >= 0.1s. The
+    0.09s ceiling catches that bug while leaving ~90ms of slack for slow CI
+    runners (engine work is instantaneous under the mock).
+    """
     mock_images = [Image.new("RGB", (100, 100))]
     regions = [
         {"pageNumber": 1, "x": 0.0, "y": 0.0, "width": 0.5, "height": 0.5, "annotationId": "a1"},
@@ -532,10 +542,7 @@ async def test_ocr_processing_seconds_histogram_excludes_spell_check_time_in_gui
     sum_seconds, _ = _histogram_count_sum(
         fresh_metrics.ocr_processing_seconds, engine="kraken"
     )
-    # Spell-check sleeps 0.05s per region × 2 regions = 0.1s; engine work is instantaneous.
-    # If timing included spell-check, sum_seconds would be >= 0.1s. Allow 30ms slack
-    # for scheduler overhead.
-    assert sum_seconds < 0.05, f"timing must exclude spell-check; got sum={sum_seconds}"
+    assert sum_seconds < 0.09, f"timing must exclude spell-check; got sum={sum_seconds}"
 
 
 @pytest.mark.asyncio