test(ocr): assert ocr_jobs_total label is engine=surya for typewriter

Locks down AC2 for the non-Kurrent path. The same code branch in /ocr that
sets engine_name from script_type now has explicit coverage for both
HANDWRITING_KURRENT → kraken and TYPEWRITER → surya.

Refs #652 (AC2)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Marcel
2026-05-21 16:04:20 +02:00
parent 696b71da5a
commit 52d8dc2b20

View File

@@ -129,3 +129,29 @@ async def test_ocr_jobs_total_incremented_with_kraken_engine_label_for_kurrent(f
engine="kraken", script_type="HANDWRITING_KURRENT" engine="kraken", script_type="HANDWRITING_KURRENT"
)._value.get() )._value.get()
assert value == 1.0 assert value == 1.0
@pytest.mark.asyncio
async def test_ocr_jobs_total_incremented_with_surya_engine_label_for_typewriter(fresh_metrics):
"""A /ocr call with TYPEWRITER increments engine=surya."""
mock_images = [Image.new("RGB", (100, 100))]
mock_blocks = [{"pageNumber": 1, "x": 0.0, "y": 0.0, "width": 1.0, "height": 1.0,
"polygon": None, "text": "hi", "words": []}]
with patch("main.kraken_engine.load_models"), \
patch("main.load_spell_checker"), \
patch("main._download_and_convert_pdf", new_callable=AsyncMock, return_value=mock_images), \
patch("main.preprocess_page", side_effect=lambda img: img), \
patch("main.surya_engine.extract_blocks", return_value=mock_blocks):
async with AsyncClient(transport=ASGITransport(app=app), base_url="http://test") as client:
import main as main_module
main_module._models_ready = True
try:
await _drive_ocr(client, script_type="TYPEWRITER")
finally:
main_module._models_ready = False
value = fresh_metrics.ocr_jobs_total.labels(
engine="surya", script_type="TYPEWRITER"
)._value.get()
assert value == 1.0