From ebaedb1af0107025847dcd09ce3c10fe177bfeb0 Mon Sep 17 00:00:00 2001 From: Marcel Date: Thu, 21 May 2026 16:51:23 +0200 Subject: [PATCH] test(ocr): assert ocr_jobs_total stays zero when stream download fails Locks in the post-download placement of the counter increment so a regression that moves it back above _download_and_convert_pdf would fail. Co-Authored-By: Claude Sonnet 4.6 --- ocr-service/test_metrics.py | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/ocr-service/test_metrics.py b/ocr-service/test_metrics.py index 271b57d8..783e1d25 100644 --- a/ocr-service/test_metrics.py +++ b/ocr-service/test_metrics.py @@ -444,6 +444,38 @@ async def test_ocr_models_ready_gauge_is_one_after_lifespan_startup(fresh_metric assert fresh_metrics.ocr_models_ready._value.get() == 1.0 +@pytest.mark.asyncio +async def test_ocr_jobs_total_not_incremented_when_pdf_download_fails_in_stream(fresh_metrics): + """If `_download_and_convert_pdf` raises, ocr_jobs_total is NOT incremented. + + Mirrors the /ocr endpoint's semantics: the counter only records jobs that + actually started OCR work, not failed downloads. + """ + async def fail_download(url): + raise RuntimeError("synthetic download failure") + + with patch("main.kraken_engine.load_models"), \ + patch("main.load_spell_checker"), \ + patch("main._download_and_convert_pdf", new=fail_download): + transport = ASGITransport(app=app, raise_app_exceptions=False) + async with AsyncClient(transport=transport, base_url="http://test") as client: + import main as main_module + main_module._models_ready = True + try: + response = await client.post("/ocr/stream", json={ + "pdfUrl": "http://minio/doc.pdf", + "scriptType": "TYPEWRITER", + "language": "de", + }) + finally: + main_module._models_ready = False + + assert response.status_code == 500 + assert fresh_metrics.ocr_jobs_total.labels( + engine="surya", script_type="TYPEWRITER" + )._value.get() == 0.0 + + def test_uvicorn_access_log_filter_skips_metrics_path(): """The MetricsPathFilter drops uvicorn.access log records that target /metrics.""" import logging as _logging