From e0e1578bdd0e0088cedc2ca28a679c592a9b9b8a Mon Sep 17 00:00:00 2001 From: Marcel Date: Thu, 21 May 2026 17:22:49 +0200 Subject: [PATCH] test(ocr): widen spell-check exclusion bound to 0.09s with rationale MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Sara's cycle-2 S1: the wall-clock assertion at < 0.05s could trip on a slow CI runner under load even when the timer correctly excludes spell-check. Sara's preferred structural fix (patch main.time.monotonic with a deterministic sequence) proved awkward — the patched attribute is the *global* time.monotonic which httpx and asyncio consume, exhausting the sequence before the request reaches the engine loop. Take the documented fallback: widen the bound to 0.09s and explain why. The failure mode the test guards against (spell-check inside the timer) would add 0.1s (2 × 0.05s sleep), so 0.09s catches the bug while leaving ~90ms of headroom for slow CI runners. Verified red→green by temporarily moving correct_text inside the timer block: bound trips at 0.101s; the fixed code reads ~0.001s. Co-Authored-By: Claude Opus 4.7 --- ocr-service/test_metrics.py | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/ocr-service/test_metrics.py b/ocr-service/test_metrics.py index 253cae14..d2bd9671 100644 --- a/ocr-service/test_metrics.py +++ b/ocr-service/test_metrics.py @@ -501,7 +501,17 @@ async def test_ocr_processing_seconds_histogram_observed_per_page_in_guided_stre @pytest.mark.asyncio async def test_ocr_processing_seconds_histogram_excludes_spell_check_time_in_guided_stream(fresh_metrics): - """The guided observation must time engine work only, not the spell-check pass.""" + """The guided observation must time engine work only, not the spell-check pass. + + Wall-clock bound rather than a structural `patch("main.time.monotonic")`: + the patched attribute is the *global* `time.monotonic`, which httpx and + asyncio also consume — they exhaust the deterministic sequence before the + request reaches the engine loop. Bound is sized against the failure mode, + not the noise floor: spell-check sleeps 0.05s × 2 regions = 0.1s, so a + timer that accidentally wrapped `correct_text` would observe >= 0.1s. The + 0.09s ceiling catches that bug while leaving ~90ms of slack for slow CI + runners (engine work is instantaneous under the mock). + """ mock_images = [Image.new("RGB", (100, 100))] regions = [ {"pageNumber": 1, "x": 0.0, "y": 0.0, "width": 0.5, "height": 0.5, "annotationId": "a1"}, @@ -532,10 +542,7 @@ async def test_ocr_processing_seconds_histogram_excludes_spell_check_time_in_gui sum_seconds, _ = _histogram_count_sum( fresh_metrics.ocr_processing_seconds, engine="kraken" ) - # Spell-check sleeps 0.05s per region × 2 regions = 0.1s; engine work is instantaneous. - # If timing included spell-check, sum_seconds would be >= 0.1s. Allow 30ms slack - # for scheduler overhead. - assert sum_seconds < 0.05, f"timing must exclude spell-check; got sum={sum_seconds}" + assert sum_seconds < 0.09, f"timing must exclude spell-check; got sum={sum_seconds}" @pytest.mark.asyncio